gwenhywfar  4.99.8beta
text.c
Go to the documentation of this file.
1 /***************************************************************************
2  begin : Sat Jun 28 2003
3  copyright : (C) 2003 by Martin Preuss
4  email : martin@libchipcard.de
5 
6  ***************************************************************************
7  * *
8  * This library is free software; you can redistribute it and/or *
9  * modify it under the terms of the GNU Lesser General Public *
10  * License as published by the Free Software Foundation; either *
11  * version 2.1 of the License, or (at your option) any later version. *
12  * *
13  * This library is distributed in the hope that it will be useful, *
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16  * Lesser General Public License for more details. *
17  * *
18  * You should have received a copy of the GNU Lesser General Public *
19  * License along with this library; if not, write to the Free Software *
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21  * MA 02111-1307 USA *
22  * *
23  ***************************************************************************/
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #define DISABLE_DEBUGLOG
30 
31 
32 #include "text.h"
33 #include <stdlib.h>
34 #include <assert.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <ctype.h>
38 #ifdef HAVE_LOCALE_H
39 # include <locale.h>
40 #endif
41 
42 #ifndef ICONV_CONST
43 # define ICONV_CONST
44 #endif
45 
46 #ifdef HAVE_ICONV_H
47 # include <iconv.h>
48 #endif
49 
50 
51 #include <gwenhywfar/gwenhywfarapi.h>
52 #include <gwenhywfar/debug.h>
53 #include <gwenhywfar/stringlist.h>
54 
55 
56 
57 typedef struct {
58  int character;
59  const char *replace;
61 
63  {'&', "&amp;"},
64  {'<', "&lt;"},
65  {'>', "&gt;"},
66  {'\'', "&apos;"},
67  {'\"', "&quot;"},
68  {0, 0}
69 };
70 
71 
72 
73 char *GWEN_Text_GetWord(const char *src,
74  const char *delims,
75  char *buffer,
76  unsigned int maxsize,
77  uint32_t flags,
78  const char **next) {
79  unsigned int size;
80  int lastWasBlank;
81  int lastBlankPos;
82  int insideQuotes;
83  int lastWasEscape;
84 
85  assert(maxsize);
86 
87  /* skip leading blanks, if wanted */
89  while(*src && (unsigned char)(*src)<33)
90  src++;
91  }
92 
93  /* get word */
94  size=0;
95  lastWasBlank=0;
96  lastBlankPos=-1;
97  lastWasEscape=0;
98  insideQuotes=0;
99 
100  if (*src=='"') {
101  insideQuotes=1;
102  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
103  src++;
104  }
105 
106  while(*src && size<(maxsize-1)) {
107  if (lastWasEscape) {
108  buffer[size]=*src;
109  size++;
110  lastWasEscape=0;
111  lastWasBlank=0;
112  lastBlankPos=-1;
113  }
114  else {
115  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
116  lastWasEscape=1;
117  lastWasBlank=0;
118  lastBlankPos=-1;
119  }
120  else {
121  if (!insideQuotes && strchr(delims, *src)!=0)
122  break;
123  if (*src=='"') {
124  if (insideQuotes) {
125  insideQuotes=0;
126  src++;
127  break;
128  }
129  else {
131  "Found a closing \" without an opening one "
132  "(consider using a backslash to escape)");
133  return 0;
134  }
135  }
136 
137 
138  if (insideQuotes ||
139  !lastWasBlank ||
140  (lastWasBlank &&
142  /* only copy if last char was NOT blank or
143  * last was blank but the caller does not want to have multiple
144  * blanks removed */
145  buffer[size]=*src;
146  size++;
147  }
148  /* remember next loop whether this char was a blank */
149  if (isspace((int)((unsigned char)*src)) && !lastWasEscape) {
150  lastWasBlank=1;
151  lastBlankPos=size;
152  }
153  else {
154  lastWasBlank=0;
155  lastBlankPos=-1;
156  }
157  } /* if this is not a backslash */
158  } /* !lastWasEscape */
159  /* advance source pointer */
160  src++;
161  } /* while */
162 
163  /* add trailing null to correctly terminate the buffer */
164  buffer[size]=0;
165 
166  if (insideQuotes) {
167  DBG_DEBUG(GWEN_LOGDOMAIN, "Missing \" after word");
168  return 0;
169  }
170  /* check whether the source string was correctly terminated */
171  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
172  if (*src) {
173  if (strchr(delims, *src)==0) {
174  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
175  return 0;
176  }
177  }
178  else {
179  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
180  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
181  return 0;
182  }
183  }
184  }
185 
186  /* remove trailing blanks, if wanted */
188  if (lastBlankPos!=-1)
189  buffer[lastBlankPos]=0;
190  }
191 
192  *next=src;
193  return buffer;
194 }
195 
196 
197 
198 int GWEN_Text_GetWordToBuffer(const char *src,
199  const char *delims,
200  GWEN_BUFFER *buf,
201  uint32_t flags,
202  const char **next) {
203  const char *savedSrc=src;
204  int lastWasBlank;
205  int lastBlankPos;
206  int insideQuotes;
207  int lastWasEscape;
208 
209  /* skip leading blanks, if wanted */
211  while(*src && (unsigned char)(*src)<33) {
212  if (strchr(delims, *src)) {
213  *next=src;
214  return 0;
215  }
216  src++;
217  }
218  }
219 
220  /* get word */
221  lastWasBlank=0;
222  lastBlankPos=-1;
223  lastWasEscape=0;
224  insideQuotes=0;
225 
226  if (*src=='"') {
227  insideQuotes=1;
228  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
229  src++;
230  }
231 
232  while(*src) {
233  if (lastWasEscape) {
234  GWEN_Buffer_AppendByte(buf, *src);
235  lastWasEscape=0;
236  lastWasBlank=0;
237  lastBlankPos=-1;
238  }
239  else {
240  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
241  lastWasEscape=1;
242  lastWasBlank=0;
243  lastBlankPos=-1;
244  }
245  else {
246  if (!insideQuotes && strchr(delims, *src)!=0)
247  break;
248  if (*src=='"') {
249  if (insideQuotes) {
250  insideQuotes=0;
251  src++;
252  break;
253  }
254  else {
256  "Found a closing \" without an opening one "
257  "(consider using a backslash to escape)");
258  return -1;
259  }
260  }
261 
262 
263  if (insideQuotes ||
264  !lastWasBlank ||
265  (lastWasBlank &&
267  /* only copy if last char was NOT blank or
268  * last was blank but the caller does not want to have multiple
269  * blanks removed */
270  GWEN_Buffer_AppendByte(buf, *src);
271  }
272  /* remember next loop whether this char was a blank */
273 
274  if (!lastWasEscape && *((unsigned char*)src)<33) {
275  lastWasBlank=1;
276  lastBlankPos=GWEN_Buffer_GetPos(buf);
277  }
278  else {
279  lastWasBlank=0;
280  lastBlankPos=-1;
281  }
282  } /* if this is not a backslash */
283  } /* !lastWasEscape */
284  /* advance source pointer */
285  src++;
286  } /* while */
287 
288  if (insideQuotes) {
289  DBG_ERROR(GWEN_LOGDOMAIN, "Missing \" after word (at %d: [%s])", (int)(src-savedSrc), savedSrc);
290  return -1;
291  }
292  /* check whether the source string was correctly terminated */
293  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
294  if (*src) {
295  if (strchr(delims, *src)==0) {
296  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
297  return -1;
298  }
299  }
300  else {
301  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
302  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
303  return -1;
304  }
305  }
306  }
307 
308  /* remove trailing blanks, if wanted */
310  if (lastBlankPos!=-1)
311  GWEN_Buffer_Crop(buf, 0, lastBlankPos);
312  }
313 
314  *next=src;
315  return 0;
316 }
317 
318 
319 
320 char *GWEN_Text_Escape(const char *src,
321  char *buffer,
322  unsigned int maxsize) {
323  unsigned int size;
324 
325  size=0;
326  while(*src) {
327  unsigned char x;
328 
329  x=(unsigned char)*src;
330  if (!(
331  (x>='A' && x<='Z') ||
332  (x>='a' && x<='z') ||
333  (x>='0' && x<='9'))) {
334  unsigned char c;
335 
336  if ((maxsize-1)<size+3) {
337  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
338  return 0;
339  }
340  buffer[size++]='%';
341  c=(((unsigned char)(*src))>>4)&0xf;
342  if (c>9)
343  c+=7;
344  c+='0';
345  buffer[size++]=c;
346  c=((unsigned char)(*src))&0xf;
347  if (c>9)
348  c+=7;
349  c+='0';
350  buffer[size++]=c;
351  }
352  else {
353  if (size<(maxsize-1))
354  buffer[size++]=*src;
355  else {
356  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
357  return 0;
358  }
359  }
360 
361  src++;
362  } /* while */
363 
364  buffer[size]=0;
365  return buffer;
366 }
367 
368 
369 
370 char *GWEN_Text_EscapeTolerant(const char *src,
371  char *buffer,
372  unsigned int maxsize) {
373  unsigned int size;
374 
375  size=0;
376  while(*src) {
377  unsigned char x;
378 
379  x=(unsigned char)*src;
380  if (!(
381  (x>='A' && x<='Z') ||
382  (x>='a' && x<='z') ||
383  (x>='0' && x<='9') ||
384  x==' ' ||
385  x=='.' ||
386  x==',' ||
387  x=='.' ||
388  x=='*' ||
389  x=='?'
390  )) {
391  unsigned char c;
392 
393  if ((maxsize-1)<size+3) {
394  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
395  return 0;
396  }
397  buffer[size++]='%';
398  c=(((unsigned char)(*src))>>4)&0xf;
399  if (c>9)
400  c+=7;
401  c+='0';
402  buffer[size++]=c;
403  c=((unsigned char)(*src))&0xf;
404  if (c>9)
405  c+=7;
406  c+='0';
407  buffer[size++]=c;
408  }
409  else {
410  if (size<(maxsize-1))
411  buffer[size++]=*src;
412  else {
413  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
414  return 0;
415  }
416  }
417 
418  src++;
419  } /* while */
420 
421  buffer[size]=0;
422  return buffer;
423 }
424 
425 
426 
427 char *GWEN_Text_UnescapeN(const char *src,
428  unsigned int srclen,
429  char *buffer,
430  unsigned int maxsize) {
431  unsigned int size;
432 
433  size=0;
434 
435  while(*src && srclen>0) {
436  unsigned char x;
437 
438  x=(unsigned char)*src;
439  if (
440  (x>='A' && x<='Z') ||
441  (x>='a' && x<='z') ||
442  (x>='0' && x<='9')) {
443  if (size<(maxsize-1))
444  buffer[size++]=*src;
445  else {
446  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
447  return 0;
448  }
449  }
450  else {
451  if (*src=='%') {
452  unsigned char d1, d2;
453  unsigned char c;
454 
455  if (srclen<3) {
456  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
457  return 0;
458  }
459  /* skip '%' */
460  src++;
461  if (!(*src) || !isxdigit((int)*src)) {
462  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
463  return 0;
464  }
465  /* read first digit */
466  d1=(unsigned char)(toupper(*src));
467 
468  /* get second digit */
469  src++;
470  if (!(*src) || !isxdigit((int)*src)) {
471  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
472  return 0;
473  }
474  d2=(unsigned char)(toupper(*src));
475  /* compute character */
476  d1-='0';
477  if (d1>9)
478  d1-=7;
479  c=(d1<<4)&0xf0;
480  d2-='0';
481  if (d2>9)
482  d2-=7;
483  c+=(d2&0xf);
484  /* store character */
485  if (size<(maxsize-1))
486  buffer[size++]=(char)c;
487  else {
488  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
489  return 0;
490  }
491  srclen-=2;
492  }
493  else {
494  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
495  "characters in escaped string (\"%s\")",
496  src);
497  return 0;
498  }
499  }
500  srclen--;
501  src++;
502  } /* while */
503 
504  buffer[size]=0;
505  return buffer;
506 }
507 
508 
509 
510 char *GWEN_Text_Unescape(const char *src,
511  char *buffer,
512  unsigned int maxsize) {
513  unsigned int srclen;
514 
515  srclen=strlen(src);
516  return GWEN_Text_UnescapeN(src, srclen, buffer, maxsize);
517 }
518 
519 
520 
521 char *GWEN_Text_UnescapeTolerantN(const char *src,
522  unsigned int srclen,
523  char *buffer,
524  unsigned int maxsize) {
525  unsigned int size;
526 
527  size=0;
528 
529  while(*src && srclen>0) {
530  unsigned char x;
531 
532  x=(unsigned char)*src;
533  if (
534  (x>='A' && x<='Z') ||
535  (x>='a' && x<='z') ||
536  (x>='0' && x<='9') ||
537  x==' ' ||
538  x=='.' ||
539  x==',' ||
540  x=='.' ||
541  x=='*' ||
542  x=='?'
543  ) {
544  if (size<(maxsize-1))
545  buffer[size++]=*src;
546  else {
547  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
548  return 0;
549  }
550  }
551  else {
552  if (*src=='%') {
553  unsigned char d1, d2;
554  unsigned char c;
555 
556  if (srclen<3) {
557  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
558  return 0;
559  }
560  /* skip '%' */
561  src++;
562  if (!(*src) || !isxdigit((int)*src)) {
563  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
564  return 0;
565  }
566  /* read first digit */
567  d1=(unsigned char)(toupper(*src));
568 
569  /* get second digit */
570  src++;
571  if (!(*src) || !isxdigit((int)*src)) {
572  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
573  return 0;
574  }
575  d2=(unsigned char)(toupper(*src));
576  /* compute character */
577  d1-='0';
578  if (d1>9)
579  d1-=7;
580  c=(d1<<4)&0xf0;
581  d2-='0';
582  if (d2>9)
583  d2-=7;
584  c+=(d2&0xf);
585  /* store character */
586  if (size<(maxsize-1))
587  buffer[size++]=(char)c;
588  else {
589  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
590  return 0;
591  }
592  srclen-=2;
593  }
594  else {
595  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
596  "characters in escaped string (\"%s\")",
597  src);
598  return 0;
599  }
600  }
601  srclen--;
602  src++;
603  } /* while */
604 
605  buffer[size]=0;
606  return buffer;
607 }
608 
609 
610 
611 char *GWEN_Text_UnescapeTolerant(const char *src,
612  char *buffer,
613  unsigned int maxsize) {
614  unsigned int srclen;
615 
616  srclen=strlen(src);
617  return GWEN_Text_UnescapeTolerantN(src, srclen, buffer, maxsize);
618 }
619 
620 
621 
622 char *GWEN_Text_ToHex(const char *src, unsigned l,
623  char *buffer, unsigned int maxsize) {
624  unsigned int pos;
625  unsigned int size;
626 
627  if ((l*2)+1 > maxsize) {
628  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
629  return 0;
630  }
631 
632  pos=0;
633  size=0;
634  while(pos<l) {
635  unsigned char c;
636 
637  c=(((unsigned char)(src[pos]))>>4)&0xf;
638  if (c>9)
639  c+=7;
640  c+='0';
641  buffer[size++]=c;
642  c=((unsigned char)(src[pos]))&0xf;
643  if (c>9)
644  c+=7;
645  c+='0';
646  buffer[size++]=c;
647  pos++;
648  }
649  buffer[size]=0;
650  return buffer;
651 }
652 
653 
654 
655 char *GWEN_Text_ToHexGrouped(const char *src,
656  unsigned l,
657  char *buffer,
658  unsigned maxsize,
659  unsigned int groupsize,
660  char delimiter,
661  int skipLeadingZeroes) {
662  unsigned int pos;
663  unsigned int size;
664  unsigned int j;
665 
666  j=0;
667 
668  pos=0;
669  size=0;
670  j=0;
671  while(pos<l) {
672  unsigned char c;
673  int skipThis;
674 
675  skipThis=0;
676  c=(((unsigned char)(src[pos]))>>4)&0xf;
677  if (skipLeadingZeroes) {
678  if (c==0)
679  skipThis=1;
680  else
681  skipLeadingZeroes=0;
682  }
683  if (c>9)
684  c+=7;
685  c+='0';
686  if (!skipThis) {
687  if (size+1>=maxsize) {
688  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
689  return 0;
690  }
691  buffer[size++]=c;
692  j++;
693  if (j==groupsize) {
694  if (size+1>=maxsize) {
695  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
696  return 0;
697  }
698  buffer[size++]=delimiter;
699  j=0;
700  }
701  }
702 
703  skipThis=0;
704  c=((unsigned char)(src[pos]))&0xf;
705  if (skipLeadingZeroes) {
706  if (c==0 && pos+1<l)
707  skipThis=1;
708  else
709  skipLeadingZeroes=0;
710  }
711  if (c>9)
712  c+=7;
713  c+='0';
714  if (size+1>=maxsize) {
715  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
716  return 0;
717  }
718  if (!skipThis) {
719  buffer[size++]=c;
720  j++;
721  if (j==groupsize) {
722  if (pos+1<l) {
723  if (size+1>=maxsize) {
724  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
725  return 0;
726  }
727  buffer[size++]=delimiter;
728  }
729  j=0;
730  }
731  }
732  pos++;
733  }
734  buffer[size]=0;
735  return buffer;
736 }
737 
738 
739 
740 int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
741  GWEN_BUFFER *buf,
742  unsigned int groupsize,
743  char delimiter,
744  int skipLeadingZeroes) {
745  unsigned int pos = 0;
746  unsigned int j = 0;
747 
748  while(pos<l) {
749  unsigned char c;
750  int skipThis;
751 
752  skipThis=0;
753  c=(((unsigned char)(src[pos]))>>4)&0xf;
754  if (skipLeadingZeroes) {
755  if (c==0)
756  skipThis=1;
757  else
758  skipLeadingZeroes=0;
759  }
760  if (c>9)
761  c+=7;
762  c+='0';
763  if (!skipThis) {
764  if (GWEN_Buffer_AppendByte(buf, c)) {
765  DBG_INFO(GWEN_LOGDOMAIN, "here");
766  return -1;
767  }
768  j++;
769  if (groupsize && j==groupsize) {
770  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
771  DBG_INFO(GWEN_LOGDOMAIN, "here");
772  return -1;
773  }
774  j=0;
775  }
776  }
777 
778  skipThis=0;
779  c=((unsigned char)(src[pos]))&0xf;
780  if (skipLeadingZeroes) {
781  if (c==0 && pos+1<l)
782  skipThis=1;
783  else
784  skipLeadingZeroes=0;
785  }
786  if (c>9)
787  c+=7;
788  c+='0';
789  if (!skipThis) {
790  if (GWEN_Buffer_AppendByte(buf, c)) {
791  DBG_INFO(GWEN_LOGDOMAIN, "here");
792  return -1;
793  }
794  j++;
795  if (groupsize && j==groupsize) {
796  if (pos+1<l) {
797  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
798  DBG_INFO(GWEN_LOGDOMAIN, "here");
799  return -1;
800  }
801  }
802  j=0;
803  }
804  }
805  pos++;
806  }
807  return 0;
808 }
809 
810 
811 
812 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize) {
813  unsigned int size = 0;
814 
815  while(*src) {
816  unsigned char d1, d2;
817  unsigned char c;
818 
819  /* read first digit */
820  if (!isxdigit((int)*src)) {
821  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
822  return -1;
823  }
824  d1=(unsigned char)(toupper(*src));
825 
826  /* get second digit */
827  src++;
828  if (!(*src) || !isxdigit((int)*src)) {
829  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
830  return -1;
831  }
832  d2=(unsigned char)(toupper(*src));
833  src++;
834 
835  /* compute character */
836  d1-='0';
837  if (d1>9)
838  d1-=7;
839  c=(d1<<4)&0xf0;
840  d2-='0';
841  if (d2>9)
842  d2-=7;
843  c+=(d2&0xf);
844  /* store character */
845  if (size<(maxsize))
846  buffer[size++]=(char)c;
847  else {
848  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (maxsize=%d)", maxsize);
849  return -1;
850  }
851  } /* while */
852 
853  return size;
854 }
855 
856 
857 
858 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf) {
859  while(*src) {
860  unsigned char d1, d2;
861  unsigned char c;
862 
863  /* read first digit */
864  if (isspace((int)*src)) {
865  src++;
866  }
867  else {
868  if (!isxdigit((int)*src)) {
869  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
870  return -1;
871  }
872  d1=(unsigned char)(toupper(*src));
873 
874  /* get second digit */
875  src++;
876  if (!(*src) || !isxdigit((int)*src)) {
877  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
878  return -1;
879  }
880  d2=(unsigned char)(toupper(*src));
881  src++;
882 
883  /* compute character */
884  d1-='0';
885  if (d1>9)
886  d1-=7;
887  c=(d1<<4)&0xf0;
888  d2-='0';
889  if (d2>9)
890  d2-=7;
891  c+=(d2&0xf);
892  /* store character */
893  GWEN_Buffer_AppendByte(buf, (char)c);
894  }
895  } /* while */
896 
897  return 0;
898 }
899 
900 
901 
902 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf) {
903  unsigned int l;
904  int fakeByte;
905 
906  l=strlen(src);
907  fakeByte=(l%2);
908  while(*src) {
909  unsigned char d1, d2;
910  unsigned char c;
911 
912  if (fakeByte) {
913  d1=0;
914  fakeByte=0;
915  }
916  else {
917  /* read first digit */
918  if (!isdigit((int)*src)) {
919  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in bcd string");
920  return -1;
921  }
922  d1=(unsigned char)(*src);
923  src++;
924  }
925  /* get second digit */
926  if (!(*src) || !isxdigit((int)*src)) {
927  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete BCD byte (only 1 digit)");
928  return -1;
929  }
930  d2=(unsigned char)(*src);
931  src++;
932 
933  /* compute character */
934  d1-='0';
935  c=(d1<<4)&0xf0;
936  d2-='0';
937  c+=(d2&0xf);
938  /* store character */
939  GWEN_Buffer_AppendByte(buf, (char)c);
940  } /* while */
941 
942  return 0;
943 }
944 
945 
946 
947 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
948  GWEN_BUFFER *buf,
949  unsigned int groupsize,
950  char delimiter,
951  int skipLeadingZeroes) {
952  unsigned int pos;
953  unsigned int j;
954 
955  j=0;
956 
957  pos=0;
958  j=0;
959  while(pos<l) {
960  unsigned char c;
961  int skipThis;
962 
963  skipThis=0;
964  c=(((unsigned char)(src[pos]))>>4)&0xf;
965  if (skipLeadingZeroes) {
966  if (c==0)
967  skipThis=1;
968  else
969  skipLeadingZeroes=0;
970  }
971  c+='0';
972  if (!skipThis) {
973  if (GWEN_Buffer_AppendByte(buf, c)) {
974  DBG_INFO(GWEN_LOGDOMAIN, "here");
975  return -1;
976  }
977  j++;
978  if (groupsize && j==groupsize) {
979  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
980  DBG_INFO(GWEN_LOGDOMAIN, "here");
981  return -1;
982  }
983  j=0;
984  }
985  }
986 
987  skipThis=0;
988  c=((unsigned char)(src[pos]))&0xf;
989  if (skipLeadingZeroes) {
990  if (c==0 && pos+1<l)
991  skipThis=1;
992  else
993  skipLeadingZeroes=0;
994  }
995  c+='0';
996  if (!skipThis) {
997  if (GWEN_Buffer_AppendByte(buf, c)) {
998  DBG_INFO(GWEN_LOGDOMAIN, "here");
999  return -1;
1000  }
1001  j++;
1002  if (groupsize && j==groupsize) {
1003  if (pos+1<l) {
1004  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
1005  DBG_INFO(GWEN_LOGDOMAIN, "here");
1006  return -1;
1007  }
1008  }
1009  j=0;
1010  }
1011  }
1012  pos++;
1013  }
1014  return 0;
1015 }
1016 
1017 
1018 
1019 int GWEN_Text_Compare(const char *s1, const char *s2, int ign) {
1020  if (s1)
1021  if (*s1==0)
1022  s1=0;
1023  if (s2)
1024  if (*s2==0)
1025  s2=0;
1026  if (!s1 && !s2)
1027  return 0;
1028  if (!s1 && s2)
1029  return 1;
1030  if (s1 && !s2)
1031  return -1;
1032  if (ign)
1033  return strcasecmp(s1, s2);
1034  else
1035  return strcmp(s1, s2);
1036 }
1037 
1038 
1039 
1040 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle) {
1041  while(*haystack) {
1042  while(*haystack && tolower(*haystack)!=tolower(*needle))
1043  haystack++;
1044  if (*haystack) {
1045  const char *t;
1046  const char *s;
1047 
1048  /* now haystack points to an area which begins with *needle */
1049  t=haystack;
1050  s=needle;
1051  t++;
1052  s++;
1053  while(*t && *s && (tolower(*t)==tolower(*s))) {
1054  t++;
1055  s++;
1056  }
1057  if (*s==0)
1058  return haystack;
1059  }
1060  else
1061  /* not found */
1062  break;
1063  /* no match here, advance haystack */
1064  haystack++;
1065  }
1066 
1067  /* not found */
1068  return NULL;
1069 }
1070 
1071 
1072 
1073 
1074 int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos,
1075  const char *p, unsigned int *ppos,
1076  int sensecase,
1077  unsigned int *matches) {
1078  char a;
1079  char b;
1080  unsigned wlength;
1081  unsigned plength;
1082 
1083  unsigned int _wpos = *wpos, _ppos = *ppos, _matches = *matches;
1084 
1085  a=0;
1086  b=0;
1087  wlength=strlen(w);
1088  plength=strlen(p);
1089 
1090  while (_wpos<wlength && _ppos<plength) {
1091  a=w[_wpos];
1092  b=p[_ppos];
1093  if (b=='*') {
1094  *wpos = _wpos;
1095  *ppos = _ppos;
1096  *matches = _matches;
1097  return 1;
1098  }
1099  if (!sensecase) {
1100  a=toupper(a);
1101  b=toupper(b);
1102  }
1103  /* count matches */
1104  if (a==b)
1105  ++_matches;
1106  if (a!=b && b!='?') {
1107  *wpos = _wpos;
1108  *ppos = _ppos;
1109  *matches = _matches;
1110  return 0;
1111  }
1112  ++_wpos;
1113  ++_ppos;
1114  }
1115  /* both at end, would be ok */
1116  if (_wpos==wlength && _ppos==plength) {
1117  *wpos = _wpos;
1118  *ppos = _ppos;
1119  *matches = _matches;
1120  return 1;
1121  }
1122  /* word ends, pattern doesnt, would be ok if pattern is '*' here */
1123  if (_wpos>=wlength && _ppos<plength)
1124  if (p[_ppos]=='*') {
1125  *wpos = _wpos;
1126  *ppos = _ppos;
1127  *matches = _matches;
1128  return 1;
1129  }
1130  /* otherwise no match ;-/ */
1131  *wpos = _wpos;
1132  *ppos = _ppos;
1133  *matches = _matches;
1134  return 0;
1135 }
1136 
1137 
1138 
1139 int GWEN_Text__findSegment(const char *w, unsigned int *wpos,
1140  const char *p, unsigned int *ppos,
1141  int sensecase,
1142  unsigned int *matches) {
1143  unsigned int lwpos, lppos, lmatches;
1144  unsigned wlength;
1145 
1146  wlength=strlen(w);
1147  lwpos=*wpos;
1148  lppos=*ppos;
1149  lmatches=*matches;
1150  while(lwpos<wlength) {
1151  *ppos=lppos;
1152  *wpos=lwpos;
1153  *matches=lmatches;
1154  if (GWEN_Text__cmpSegment(w,wpos,p,ppos,sensecase,matches))
1155  return 1;
1156  lwpos++;
1157  }
1158  return 0;
1159 }
1160 
1161 
1162 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase) {
1163  unsigned int ppos;
1164  unsigned int wpos;
1165  unsigned int matches;
1166  unsigned int plength;
1167 
1168  ppos=wpos=matches=0;
1169  plength=strlen(p);
1170 
1171  /* compare until first occurrence of '*' */
1172  if (!GWEN_Text__cmpSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
1173  return -1;
1174  }
1175 
1176  while(1) {
1177  /* if pattern ends we have done it */
1178  if (ppos>=plength)
1179  return matches;
1180  /* skip '*' in pattern */
1181  ppos++;
1182  /* if pattern ends behind '*' the word matches */
1183  if (ppos>=plength)
1184  return matches;
1185  /* find next matching segment */
1186  if (!GWEN_Text__findSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
1187  return -1;
1188  }
1189  } /* while */
1190  /* I know, we'll never get to here ;-) */
1191  return -1;
1192 }
1193 
1194 
1195 
1196 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
1197  int fillchar) {
1198  char lbuffer[128];
1199  unsigned int i;
1200 
1201  sprintf(lbuffer,"%d", num);
1202  i=strlen(lbuffer);
1203  if (i>=bufsize) {
1204  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (%d>=%d)", i, bufsize);
1205  return -1;
1206  }
1207  if (fillchar>0) {
1208  /* fill right, but first copy chars */
1209  strcpy(buffer, lbuffer);
1210  while(i<bufsize-1)
1211  buffer[i++]=fillchar;
1212  buffer[i]=0;
1213  return bufsize;
1214  }
1215  else if (fillchar<0) {
1216  int j, k;
1217 
1218  fillchar=-fillchar;
1219  j=bufsize-1-i;
1220  for (k=0; k<j; k++)
1221  buffer[k]=fillchar;
1222  buffer[k]=0;
1223  strcat(buffer, lbuffer);
1224  return bufsize;
1225  }
1226  else {
1227  /* dont fill, just copy */
1228  strcpy(buffer, lbuffer);
1229  return i;
1230  }
1231 }
1232 
1233 
1234 
1235 void GWEN_Text_DumpString(const char *s, unsigned int l,
1236  unsigned int insert) {
1237  unsigned int i;
1238  unsigned int j;
1239  unsigned int pos;
1240  unsigned int k;
1241 
1242  pos=0;
1243  for (k=0; k<insert; k++)
1244  fprintf(stderr, " ");
1245  fprintf(stderr,"String size is %d:\n",l);
1246  while(pos<l) {
1247  for (k=0; k<insert; k++)
1248  fprintf(stderr, " ");
1249  fprintf(stderr,"%04x: ",pos);
1250  j=pos+16;
1251  if (j>=l)
1252  j=l;
1253 
1254  /* show hex dump */
1255  for (i=pos; i<j; i++) {
1256  fprintf(stderr,"%02x ",(unsigned char)s[i]);
1257  }
1258  if (j-pos<16)
1259  for (i=0; i<16-(j-pos); i++)
1260  fprintf(stderr," ");
1261  /* show text */
1262  for (i=pos; i<j; i++) {
1263  if (s[i]<32)
1264  fprintf(stderr,".");
1265  else
1266  fprintf(stderr,"%c",s[i]);
1267  }
1268  fprintf(stderr,"\n");
1269  pos+=16;
1270  }
1271 }
1272 
1273 
1274 
1275 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
1276  GWEN_BUFFER *mbuf,
1277  unsigned int insert) {
1278  unsigned int i;
1279  unsigned int j;
1280  unsigned int pos;
1281  unsigned k;
1282  char numbuf[32];
1283 
1284  pos=0;
1285  for (k=0; k<insert; k++)
1286  GWEN_Buffer_AppendByte(mbuf, ' ');
1287  GWEN_Buffer_AppendString(mbuf,"String size is ");
1288  snprintf(numbuf, sizeof(numbuf), "%d", l);
1289  GWEN_Buffer_AppendString(mbuf, numbuf);
1290  GWEN_Buffer_AppendByte(mbuf, '\n');
1291  while(pos<l) {
1292  for (k=0; k<insert; k++)
1293  GWEN_Buffer_AppendByte(mbuf, ' ');
1294  snprintf(numbuf, sizeof(numbuf),"%04x: ",pos);
1295  GWEN_Buffer_AppendString(mbuf, numbuf);
1296  j=pos+16;
1297  if (j>=l)
1298  j=l;
1299 
1300  /* show hex dump */
1301  for (i=pos; i<j; i++) {
1302  snprintf(numbuf, sizeof(numbuf),"%02x ", (unsigned char)s[i]);
1303  GWEN_Buffer_AppendString(mbuf, numbuf);
1304  }
1305  if (j-pos<16)
1306  for (i=0; i<16-(j-pos); i++)
1307  GWEN_Buffer_AppendString(mbuf, " ");
1308  /* show text */
1309  for (i=pos; i<j; i++) {
1310  if (s[i]<32)
1311  GWEN_Buffer_AppendByte(mbuf, '.');
1312  else
1313  GWEN_Buffer_AppendByte(mbuf, s[i]);
1314  }
1315  GWEN_Buffer_AppendByte(mbuf, '\n');
1316  pos+=16;
1317  }
1318 }
1319 
1320 
1321 
1322 
1323 
1324 
1325 
1326 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf) {
1327  while(*src) {
1328  unsigned char x;
1329 
1330  x=(unsigned char)*src;
1331  if (!(
1332  (x>='A' && x<='Z') ||
1333  (x>='a' && x<='z') ||
1334  (x>='0' && x<='9'))) {
1335  unsigned char c;
1336 
1337  GWEN_Buffer_AppendByte(buf, '%');
1338  c=(((unsigned char)(*src))>>4)&0xf;
1339  if (c>9)
1340  c+=7;
1341  c+='0';
1342  GWEN_Buffer_AppendByte(buf, c);
1343  c=((unsigned char)(*src))&0xf;
1344  if (c>9)
1345  c+=7;
1346  c+='0';
1347  GWEN_Buffer_AppendByte(buf, c);
1348  }
1349  else
1350  GWEN_Buffer_AppendByte(buf, *src);
1351 
1352  src++;
1353  } /* while */
1354 
1355  return 0;
1356 }
1357 
1358 
1359 
1360 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf) {
1361  while(*src) {
1362  unsigned char x;
1363 
1364  x=(unsigned char)*src;
1365  if (
1366  (x>='A' && x<='Z') ||
1367  (x>='a' && x<='z') ||
1368  (x>='0' && x<='9')) {
1369  GWEN_Buffer_AppendByte(buf, *src);
1370  }
1371  else {
1372  if (*src=='%') {
1373  unsigned char d1, d2;
1374  unsigned char c;
1375 
1376  /* skip '%' */
1377  src++;
1378  if (!(*src) || !isxdigit((int)*src)) {
1379  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
1380  return 0;
1381  }
1382  /* read first digit */
1383  d1=(unsigned char)(toupper(*src));
1384 
1385  /* get second digit */
1386  src++;
1387  if (!(*src) || !isxdigit((int)*src)) {
1388  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
1389  return 0;
1390  }
1391  d2=(unsigned char)(toupper(*src));
1392  /* compute character */
1393  d1-='0';
1394  if (d1>9)
1395  d1-=7;
1396  c=(d1<<4)&0xf0;
1397  d2-='0';
1398  if (d2>9)
1399  d2-=7;
1400  c+=(d2&0xf);
1401  /* store character */
1402  GWEN_Buffer_AppendByte(buf, (char)c);
1403  }
1404  else {
1405  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
1406  "characters in escaped string (\"%s\")",
1407  src);
1408  return -1;
1409  }
1410  }
1411  src++;
1412  } /* while */
1413 
1414  return 0;
1415 }
1416 
1417 
1418 
1419 int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf) {
1420  while(*src) {
1421  unsigned char x;
1422 
1423  x=(unsigned char)*src;
1424  if (!(
1425  (x>='A' && x<='Z') ||
1426  (x>='a' && x<='z') ||
1427  (x>='0' && x<='9') ||
1428  x==' ' ||
1429  x=='.' ||
1430  x==',' ||
1431  x=='.' ||
1432  x=='_' ||
1433  x=='-' ||
1434  x=='*' ||
1435  x=='?'
1436  )) {
1437  unsigned char c;
1438 
1439  GWEN_Buffer_AppendByte(buf, '%');
1440  c=(((unsigned char)(*src))>>4)&0xf;
1441  if (c>9)
1442  c+=7;
1443  c+='0';
1444  GWEN_Buffer_AppendByte(buf, c);
1445  c=((unsigned char)(*src))&0xf;
1446  if (c>9)
1447  c+=7;
1448  c+='0';
1449  GWEN_Buffer_AppendByte(buf, c);
1450  }
1451  else
1452  GWEN_Buffer_AppendByte(buf, *src);
1453 
1454  src++;
1455  } /* while */
1456 
1457  return 0;
1458 }
1459 
1460 
1461 
1463  while(*src) {
1464  //const char *srcBak=src;
1465 
1466  int charHandled=0;
1467  if (*src=='%') {
1468  if (strlen(src)>2) {
1469  unsigned char d1, d2;
1470  unsigned char c;
1471 
1472  if (isxdigit((int)src[1]) && isxdigit((int)src[2])) {
1473  /* skip '%' */
1474  src++;
1475  /* read first digit */
1476  d1=(unsigned char)(toupper(*src));
1477 
1478  /* get second digit */
1479  src++;
1480  d2=(unsigned char)(toupper(*src));
1481  /* compute character */
1482  d1-='0';
1483  if (d1>9)
1484  d1-=7;
1485  c=(d1<<4)&0xf0;
1486  d2-='0';
1487  if (d2>9)
1488  d2-=7;
1489  c+=(d2&0xf);
1490  /* store character */
1491  GWEN_Buffer_AppendByte(buf, (char)c);
1492  charHandled=1;
1493  }
1494  }
1495  }
1496  if (!charHandled)
1497  GWEN_Buffer_AppendByte(buf, *src);
1498  src++;
1499  } /* while */
1500 
1501  return 0;
1502 }
1503 
1504 
1505 
1507  while(GWEN_Buffer_GetBytesLeft(src)) {
1508  int z;
1509  unsigned char x;
1510 
1511  z=GWEN_Buffer_ReadByte(src);
1512  if (z==-1) {
1513  DBG_INFO(GWEN_LOGDOMAIN, "here");
1514  return -1;
1515  }
1516  x=(unsigned char)z;
1517  if (!(
1518  (x>='A' && x<='Z') ||
1519  (x>='a' && x<='z') ||
1520  (x>='0' && x<='9') ||
1521  x==' ' ||
1522  x=='.' ||
1523  x==',' ||
1524  x=='.' ||
1525  x=='*' ||
1526  x=='?'
1527  )) {
1528  unsigned char c;
1529 
1530  GWEN_Buffer_AppendByte(buf, '%');
1531  c=(((unsigned char)x)>>4)&0xf;
1532  if (c>9)
1533  c+=7;
1534  c+='0';
1535  GWEN_Buffer_AppendByte(buf, c);
1536  c=((unsigned char)x)&0xf;
1537  if (c>9)
1538  c+=7;
1539  c+='0';
1540  GWEN_Buffer_AppendByte(buf, c);
1541  }
1542  else
1543  GWEN_Buffer_AppendByte(buf, x);
1544  } /* while */
1545 
1546  return 0;
1547 }
1548 
1549 
1550 
1551 void GWEN_Text_LogString(const char *s, unsigned int l,
1552  const char *logDomain,
1553  GWEN_LOGGER_LEVEL lv) {
1554  GWEN_BUFFER *mbuf;
1555 
1556  mbuf=GWEN_Buffer_new(0, ((l*16)<1024)?1024:l*16, 0, 1);
1557  GWEN_Text_DumpString2Buffer(s, l, mbuf, 0);
1558  GWEN_Logger_Log(logDomain, lv, GWEN_Buffer_GetStart(mbuf));
1559  GWEN_Buffer_free(mbuf);
1560 }
1561 
1562 
1563 
1565  const char *p;
1566  char *dst;
1567  unsigned int size;
1568  unsigned int i;
1569  int lastWasBlank;
1570  char *lastBlankPos;
1571 
1572  dst=GWEN_Buffer_GetStart(buf);
1573  p=dst;
1574  size=GWEN_Buffer_GetUsedBytes(buf);
1575  lastWasBlank=0;
1576  lastBlankPos=0;
1577 
1578  for (i=0; i<size; i++) {
1579  /* remember next loop whether this char was a blank */
1580  if (isspace((int)*p)) {
1581  if (!lastWasBlank) {
1582  /* store only one blank */
1583  lastWasBlank=1;
1584  lastBlankPos=dst;
1585  *(dst++)=*p;
1586  }
1587  }
1588  else {
1589  lastWasBlank=0;
1590  lastBlankPos=0;
1591  *(dst++)=*p;
1592  }
1593  p++;
1594  }
1595 
1596  /* remove trailing blanks */
1597  if (lastBlankPos!=0)
1598  dst=lastBlankPos;
1599 
1600  size=dst-GWEN_Buffer_GetStart(buf);
1601  GWEN_Buffer_Crop(buf, 0, size);
1602 }
1603 
1604 
1605 
1606 int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf) {
1607  char numbuf[128];
1608  int rv;
1609 #ifdef HAVE_SETLOCALE
1610  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1611  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1612  setlocale(LC_NUMERIC,"C");
1613 #endif
1614 
1615  rv=snprintf(numbuf, sizeof(numbuf), "%f", num);
1616 
1617 #ifdef HAVE_SETLOCALE
1618  setlocale(LC_NUMERIC, currentLocale);
1619  free(currentLocale);
1620 #endif
1621 
1622  if (rv<1 || rv>=sizeof(numbuf))
1623  return -1;
1624  GWEN_Buffer_AppendString(buf, numbuf);
1625  return 0;
1626 }
1627 
1628 
1629 
1630 int GWEN_Text_StringToDouble(const char *s, double *num) {
1631  int rv;
1632 #ifdef HAVE_SETLOCALE
1633  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1634  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1635  setlocale(LC_NUMERIC,"C");
1636 #endif
1637 
1638  rv=sscanf(s, "%lf", num);
1639 
1640 #ifdef HAVE_SETLOCALE
1641  setlocale(LC_NUMERIC, currentLocale);
1642  free(currentLocale);
1643 #endif
1644 
1645  if (rv!=1)
1646  return -1;
1647  return 0;
1648 }
1649 
1650 
1651 
1652 double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign) {
1653  int nboth;
1654  int nmatch;
1655  double pc;
1656 
1657  nboth=strlen(s1)+strlen(s2);
1658  nmatch=0;
1659  if (ign) {
1660  while(*s1 && *s2) {
1661  const char *t;
1662  int lmatch;
1663 
1664  /* find next equal in s2 */
1665  t=s2;
1666  lmatch=0;
1667  while(*t) {
1668  if (toupper(*s1)==toupper(*t)) {
1669  lmatch=2;
1670  break;
1671  }
1672  if (isalnum((int)*s1) && isalnum((int)*t)) {
1673  lmatch=1;
1674  break;
1675  }
1676  t++;
1677  } /* while */
1678 
1679  if (lmatch) {
1680  nmatch+=lmatch;
1681  s2=t+1;
1682  }
1683 
1684  s1++;
1685  } /* while */
1686  }
1687  else {
1688  while(*s1 && *s2) {
1689  const char *t;
1690  int lmatch;
1691 
1692  /* find next equal in s2 */
1693  t=s2;
1694  lmatch=0;
1695  while(*t) {
1696  if (*s1==*t) {
1697  lmatch=2;
1698  break;
1699  }
1700  if (toupper(*s1)==toupper(*t)) {
1701  lmatch=1;
1702  break;
1703  }
1704  if (isalnum((int)*s1) && isalnum((int)*t)) {
1705  lmatch=1;
1706  break;
1707  }
1708  t++;
1709  } /* while */
1710 
1711  if (lmatch) {
1712  nmatch+=lmatch;
1713  s2=t+1;
1714  }
1715 
1716  s1++;
1717  } /* while */
1718  }
1719 
1720  pc=(nmatch*100)/nboth;
1721  return pc;
1722 }
1723 
1724 
1725 
1726 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign) {
1727  double pc1, pc2;
1728 
1729  pc1=GWEN_Text__CheckSimilarity(s1, s2, ign);
1730  pc2=GWEN_Text__CheckSimilarity(s2, s1, ign);
1731  if (pc2>pc1)
1732  return pc2;
1733  return pc1;
1734 }
1735 
1736 
1737 
1738 int GWEN_Text_CountUtf8Chars(const char *s, int len) {
1739  int count;
1740  int handled;
1741 
1742  if (len==0)
1743  len=strlen(s);
1744  count=0;
1745  handled=0;
1746  while(handled<len) {
1747  unsigned char c;
1748  int i;
1749 
1750  c=(unsigned char)*s;
1751  if ((c & 0xfe)==0xfc)
1752  i=5;
1753  else if ((c & 0xfc)==0xf8)
1754  i=4;
1755  else if ((c & 0xf8)==0xf0)
1756  i=3;
1757  else if ((c & 0xf0)==0xe0)
1758  i=2;
1759  else if ((c & 0xe0)==0xc0)
1760  i=1;
1761  else if (c & 0x80) {
1762  DBG_ERROR(GWEN_LOGDOMAIN, "Invalid UTF8 character at pos %d", handled);
1763  return -1;
1764  }
1765  else
1766  i=0;
1767  if (handled+i+1>len) {
1769  "Incomplete UTF8 sequence at pos %d", handled);
1770  return -1;
1771  }
1772  s++;
1773  if (i) {
1774  int j;
1775 
1776  for (j=0; j<i; j++) {
1777  if ((((unsigned char)*s) & 0xc0)!=0xc0) {
1779  "Invalid UTF8 sequence at pos %d (rel %d of %d)",
1780  handled, j, i);
1781  }
1782  s++;
1783  }
1784  }
1785  handled+=i+1;
1786  count++;
1787  } /* while */
1788 
1789  return count;
1790 }
1791 
1792 
1793 
1794 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
1795  char *pdst;
1796  uint32_t roomLeft;
1797  uint32_t bytesAdded;
1798 
1799 #define GWEN_TEXT__APPENDCHAR(chr) \
1800  if (roomLeft<2) { \
1801  if (bytesAdded) { \
1802  GWEN_Buffer_IncrementPos(buf, bytesAdded); \
1803  GWEN_Buffer_AdjustUsedBytes(buf); \
1804  } \
1805  GWEN_Buffer_AllocRoom(buf, 2); \
1806  pdst=GWEN_Buffer_GetPosPointer(buf); \
1807  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf); \
1808  bytesAdded=0; \
1809  } \
1810  *(pdst++)=(unsigned char)chr; \
1811  *pdst=0; \
1812  bytesAdded++; \
1813  roomLeft--
1814 
1815  pdst=GWEN_Buffer_GetPosPointer(buf);
1816  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf);
1817  bytesAdded=0;
1818 
1819  while(*src) {
1820  unsigned char x;
1821  int match;
1822 
1823  match=0;
1824  x=(unsigned char)*src;
1825  if (x=='&') {
1826  if (src[1]=='#') {
1827  unsigned char num=0;
1828 
1829  src++;
1830  src++;
1831  while(*src && isdigit((int)*src)) {
1832  num*=10;
1833  num+=(*src)-'0';
1834  src++;
1835  }
1836  src++;
1837  GWEN_TEXT__APPENDCHAR(num);
1838  }
1839  else if (strncmp(src+1, "szlig;", 6)==0) {
1840  GWEN_TEXT__APPENDCHAR(0xc3);
1841  GWEN_TEXT__APPENDCHAR(0x9f);
1842  src+=7;
1843  match=1;
1844  }
1845  else if (strncmp(src+1, "Auml;", 5)==0) {
1846  GWEN_TEXT__APPENDCHAR(0xc3);
1847  GWEN_TEXT__APPENDCHAR(0x84);
1848  src+=6;
1849  match=1;
1850  }
1851  else if (strncmp(src+1, "Ouml;", 5)==0) {
1852  GWEN_TEXT__APPENDCHAR(0xc3);
1853  GWEN_TEXT__APPENDCHAR(0x96);
1854  src+=6;
1855  match=1;
1856  }
1857  else if (strncmp(src+1, "Uuml;", 5)==0) {
1858  GWEN_TEXT__APPENDCHAR(0xc3);
1859  GWEN_TEXT__APPENDCHAR(0x9c);
1860  src+=6;
1861  match=1;
1862  }
1863  else if (strncmp(src+1, "auml;", 5)==0) {
1864  GWEN_TEXT__APPENDCHAR(0xc3);
1865  GWEN_TEXT__APPENDCHAR(0xa4);
1866  src+=6;
1867  match=1;
1868  }
1869  else if (strncmp(src+1, "ouml;", 5)==0) {
1870  GWEN_TEXT__APPENDCHAR(0xc3);
1871  GWEN_TEXT__APPENDCHAR(0xb6);
1872  src+=6;
1873  match=1;
1874  }
1875  else if (strncmp(src+1, "uuml;", 5)==0) {
1876  GWEN_TEXT__APPENDCHAR(0xc3);
1877  GWEN_TEXT__APPENDCHAR(0xbc);
1878  src+=6;
1879  match=1;
1880  }
1881  else {
1882  const GWEN_TEXT_ESCAPE_ENTRY *e;
1884  while(e->replace) {
1885  int l;
1886 
1887  l=strlen(e->replace);
1888  if (strncasecmp(src, e->replace, l)==0) {
1890  //GWEN_Buffer_AppendByte(buf, e->character);
1891  src+=l;
1892  match=1;
1893  break;
1894  }
1895  e++;
1896  } /* while */
1897  }
1898  }
1899  if (!match) {
1900  GWEN_TEXT__APPENDCHAR(*(src++));
1901  }
1902  } /* while */
1903 
1904  if (bytesAdded) {
1905  GWEN_Buffer_IncrementPos(buf, bytesAdded);
1907  }
1908 
1909  return 0;
1910 #undef GWEN_TEXT__APPENDCHAR
1911 }
1912 
1913 
1914 
1915 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
1916  while(*src) {
1917  unsigned char x;
1918  const GWEN_TEXT_ESCAPE_ENTRY *e;
1919  int match;
1920 
1921  match=0;
1922  x=(unsigned char)*src;
1924  while(e->replace) {
1925  if (x==e->character) {
1927  match=1;
1928  break;
1929  }
1930  e++;
1931  } /* while */
1932 
1933  if (!match) {
1934  if (0 && x>127) { /* disabled */
1935  char numbuf[32];
1936 
1937  snprintf(numbuf, sizeof(numbuf), "&#%d;", x);
1938  GWEN_Buffer_AppendString(buf, numbuf);
1939  }
1940  else
1941  GWEN_Buffer_AppendByte(buf, *src);
1942  }
1943  src++;
1944  } /* while */
1945 
1946  return 0;
1947 }
1948 
1949 
1950 
1951 int GWEN_Text_ConvertCharset(const char *fromCharset,
1952  const char *toCharset,
1953  const char *text, int len,
1954  GWEN_BUFFER *tbuf) {
1955  if (len) {
1956  if (fromCharset && *fromCharset && toCharset && *toCharset &&
1957  strcasecmp(fromCharset, toCharset)!=0) {
1958 #ifndef HAVE_ICONV
1960  "iconv not available, can not convert from \"%s\" to \"%s\"",
1961  fromCharset, toCharset);
1962 #else
1963  iconv_t ic;
1964 
1965  ic=iconv_open(toCharset, fromCharset);
1966  if (ic==((iconv_t)-1)) {
1967  DBG_ERROR(GWEN_LOGDOMAIN, "Charset \"%s\" or \"%s\" not available",
1968  fromCharset, toCharset);
1969  }
1970  else {
1971  char *outbuf;
1972  char *pOutbuf;
1973  /* Some systems have iconv in libc, some have it in libiconv
1974  (OSF/1 and those with the standalone portable GNU libiconv
1975  installed). Check which one is available. The define
1976  ICONV_CONST will be "" or "const" accordingly. */
1977  ICONV_CONST char *pInbuf;
1978  size_t inLeft;
1979  size_t outLeft;
1980  size_t done;
1981  size_t space;
1982 
1983  /* convert */
1984  pInbuf=(char*)text;
1985 
1986  outLeft=len*2;
1987  space=outLeft;
1988  outbuf=(char*)malloc(outLeft);
1989  assert(outbuf);
1990 
1991  inLeft=len;
1992  pInbuf=(char*)text;
1993  pOutbuf=outbuf;
1994  done=iconv(ic, &pInbuf, &inLeft, &pOutbuf, &outLeft);
1995  if (done==(size_t)-1) {
1996  DBG_ERROR(GWEN_LOGDOMAIN, "Error in conversion: %s (%d)",
1997  strerror(errno), errno);
1998  free(outbuf);
1999  iconv_close(ic);
2000  return GWEN_ERROR_GENERIC;
2001  }
2002 
2003  GWEN_Buffer_AppendBytes(tbuf, outbuf, space-outLeft);
2004  free(outbuf);
2005  DBG_DEBUG(GWEN_LOGDOMAIN, "Conversion done.");
2006  iconv_close(ic);
2007  return 0;
2008  }
2009 #endif
2010  }
2011 
2012  GWEN_Buffer_AppendBytes(tbuf, text, len);
2013  }
2014  return 0;
2015 }
2016 
int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition: text.c:1074
uint32_t GWEN_Buffer_GetBytesLeft(GWEN_BUFFER *bf)
Definition: buffer.c:577
char * GWEN_Buffer_GetStart(const GWEN_BUFFER *bf)
Definition: buffer.c:223
void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf)
Definition: text.c:1564
#define GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS
Definition: text.h:45
const char * replace
Definition: text.c:59
Definition: text.c:57
uint32_t GWEN_Buffer_GetMaxUnsegmentedWrite(GWEN_BUFFER *bf)
Definition: buffer.c:569
uint32_t GWEN_Buffer_GetUsedBytes(const GWEN_BUFFER *bf)
Definition: buffer.c:266
void GWEN_Text_DumpString(const char *s, unsigned int l, unsigned int insert)
Definition: text.c:1235
GWEN_LOGGER_LEVEL
Definition: logger.h:64
#define NULL
Definition: binreloc.c:290
int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1419
int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize)
Definition: text.c:812
int GWEN_Text_ConvertCharset(const char *fromCharset, const char *toCharset, const char *text, int len, GWEN_BUFFER *tbuf)
Definition: text.c:1951
int GWEN_Buffer_AdjustUsedBytes(GWEN_BUFFER *bf)
Definition: buffer.c:513
#define GWEN_LOGDOMAIN
Definition: logger.h:35
uint32_t GWEN_Buffer_GetPos(const GWEN_BUFFER *bf)
Definition: buffer.c:239
static const GWEN_TEXT_ESCAPE_ENTRY gwen_text__xml_escape_chars[]
Definition: text.c:62
GWEN_BUFFER * GWEN_Buffer_new(char *buffer, uint32_t size, uint32_t used, int take)
Definition: buffer.c:38
int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1462
char * GWEN_Buffer_GetPosPointer(const GWEN_BUFFER *bf)
Definition: buffer.c:588
char * GWEN_Text_GetWord(const char *src, const char *delims, char *buffer, unsigned int maxsize, uint32_t flags, const char **next)
Definition: text.c:73
int GWEN_Buffer_IncrementPos(GWEN_BUFFER *bf, uint32_t i)
Definition: buffer.c:495
double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign)
Definition: text.c:1726
int GWEN_Text_CountUtf8Chars(const char *s, int len)
Definition: text.c:1738
#define ICONV_CONST
Definition: text.c:43
void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l, GWEN_BUFFER *mbuf, unsigned int insert)
Definition: text.c:1275
#define GWEN_TEXT_FLAGS_NULL_IS_DELIMITER
Definition: text.h:48
int GWEN_Logger_Log(const char *logDomain, GWEN_LOGGER_LEVEL priority, const char *s)
Definition: logger.c:533
#define DBG_DEBUG(dbg_logger, format, args...)
Definition: debug.h:192
#define GWEN_TEXT_FLAGS_NEED_DELIMITER
Definition: text.h:47
int GWEN_Text_ToBcdBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:947
int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1326
const char * GWEN_Text_StrCaseStr(const char *haystack, const char *needle)
Definition: text.c:1040
char * GWEN_Text_Unescape(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:510
#define GWEN_ERROR_GENERIC
Definition: error.h:62
#define GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS
Definition: text.h:46
void GWEN_Text_LogString(const char *s, unsigned int l, const char *logDomain, GWEN_LOGGER_LEVEL lv)
Definition: text.c:1551
char * GWEN_Text_UnescapeTolerantN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition: text.c:521
int GWEN_Buffer_AppendByte(GWEN_BUFFER *bf, char c)
Definition: buffer.c:380
int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:902
int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1794
double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign)
Definition: text.c:1652
int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1915
void GWEN_Buffer_free(GWEN_BUFFER *bf)
Definition: buffer.c:83
struct GWEN_BUFFER GWEN_BUFFER
A dynamically resizeable text buffer.
Definition: buffer.h:41
int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1360
int GWEN_Text_Compare(const char *s1, const char *s2, int ign)
Definition: text.c:1019
int GWEN_Text__findSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition: text.c:1139
int GWEN_Buffer_Crop(GWEN_BUFFER *bf, uint32_t pos, uint32_t l)
Definition: buffer.c:973
int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize, int fillchar)
Definition: text.c:1196
#define DBG_ERROR(dbg_logger, format, args...)
Definition: debug.h:97
char * GWEN_Text_ToHex(const char *src, unsigned l, char *buffer, unsigned int maxsize)
Definition: text.c:622
int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase)
Definition: text.c:1162
char * GWEN_Text_ToHexGrouped(const char *src, unsigned l, char *buffer, unsigned maxsize, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:655
int GWEN_Text_StringToDouble(const char *s, double *num)
Definition: text.c:1630
#define DBG_INFO(dbg_logger, format, args...)
Definition: debug.h:164
#define GWEN_TEXT_FLAGS_DEL_QUOTES
Definition: text.h:49
char * GWEN_Text_UnescapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:611
int GWEN_Text_GetWordToBuffer(const char *src, const char *delims, GWEN_BUFFER *buf, uint32_t flags, const char **next)
Definition: text.c:198
char * GWEN_Text_EscapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:370
int GWEN_Buffer_ReadByte(GWEN_BUFFER *bf)
Definition: buffer.c:477
char * GWEN_Text_Escape(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:320
#define GWEN_TEXT__APPENDCHAR(chr)
#define GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS
Definition: text.h:44
int GWEN_Buffer_AppendBytes(GWEN_BUFFER *bf, const char *buffer, uint32_t size)
Definition: buffer.c:348
int character
Definition: text.c:58
#define GWEN_TEXT_FLAGS_CHECK_BACKSLASH
Definition: text.h:50
int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:858
int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf)
Definition: text.c:1506
int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf)
Definition: text.c:1606
int GWEN_Text_ToHexBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:740
int GWEN_Buffer_AppendString(GWEN_BUFFER *bf, const char *buffer)
Definition: buffer.c:1014
char * GWEN_Text_UnescapeN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition: text.c:427