gwenhywfar  4.99.15beta
text.c
Go to the documentation of this file.
1 /***************************************************************************
2  begin : Sat Jun 28 2003
3  copyright : (C) 2003 by Martin Preuss
4  email : martin@libchipcard.de
5 
6  ***************************************************************************
7  * *
8  * This library is free software; you can redistribute it and/or *
9  * modify it under the terms of the GNU Lesser General Public *
10  * License as published by the Free Software Foundation; either *
11  * version 2.1 of the License, or (at your option) any later version. *
12  * *
13  * This library is distributed in the hope that it will be useful, *
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16  * Lesser General Public License for more details. *
17  * *
18  * You should have received a copy of the GNU Lesser General Public *
19  * License along with this library; if not, write to the Free Software *
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21  * MA 02111-1307 USA *
22  * *
23  ***************************************************************************/
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #define DISABLE_DEBUGLOG
30 
31 
32 #include "text.h"
33 #include <stdlib.h>
34 #include <assert.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <ctype.h>
38 #ifdef HAVE_LOCALE_H
39 # include <locale.h>
40 #endif
41 
42 #ifndef ICONV_CONST
43 # define ICONV_CONST
44 #endif
45 
46 #ifdef HAVE_ICONV_H
47 # include <iconv.h>
48 #endif
49 
50 
51 #include <gwenhywfar/gwenhywfarapi.h>
52 #include <gwenhywfar/debug.h>
53 #include <gwenhywfar/stringlist.h>
54 
55 
56 
57 typedef struct {
58  int character;
59  const char *replace;
61 
63  {'&', "&amp;"},
64  {'<', "&lt;"},
65  {'>', "&gt;"},
66  {'\'', "&apos;"},
67  {'\"', "&quot;"},
68  {0, 0}
69 };
70 
71 
72 
73 char *GWEN_Text_GetWord(const char *src,
74  const char *delims,
75  char *buffer,
76  unsigned int maxsize,
77  uint32_t flags,
78  const char **next)
79 {
80  unsigned int size;
81  int lastWasBlank;
82  int lastBlankPos;
83  int insideQuotes;
84  int lastWasEscape;
85 
86  assert(maxsize);
87 
88  /* skip leading blanks, if wanted */
90  while (*src && (unsigned char)(*src)<33)
91  src++;
92  }
93 
94  /* get word */
95  size=0;
96  lastWasBlank=0;
97  lastBlankPos=-1;
98  lastWasEscape=0;
99  insideQuotes=0;
100 
101  if (*src=='"') {
102  insideQuotes=1;
103  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
104  src++;
105  }
106 
107  while (*src && size<(maxsize-1)) {
108  if (lastWasEscape) {
109  buffer[size]=*src;
110  size++;
111  lastWasEscape=0;
112  lastWasBlank=0;
113  lastBlankPos=-1;
114  }
115  else {
116  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
117  lastWasEscape=1;
118  lastWasBlank=0;
119  lastBlankPos=-1;
120  }
121  else {
122  if (!insideQuotes && strchr(delims, *src)!=0)
123  break;
124  if (*src=='"') {
125  if (insideQuotes) {
126  insideQuotes=0;
127  src++;
128  break;
129  }
130  else {
132  "Found a closing \" without an opening one "
133  "(consider using a backslash to escape)");
134  return 0;
135  }
136  }
137 
138 
139  if (insideQuotes ||
140  !lastWasBlank ||
141  (lastWasBlank &&
143  /* only copy if last char was NOT blank or
144  * last was blank but the caller does not want to have multiple
145  * blanks removed */
146  buffer[size]=*src;
147  size++;
148  }
149  /* remember next loop whether this char was a blank */
150  if (isspace((int)((unsigned char)*src)) && !lastWasEscape) {
151  lastWasBlank=1;
152  lastBlankPos=size;
153  }
154  else {
155  lastWasBlank=0;
156  lastBlankPos=-1;
157  }
158  } /* if this is not a backslash */
159  } /* !lastWasEscape */
160  /* advance source pointer */
161  src++;
162  } /* while */
163 
164  /* add trailing null to correctly terminate the buffer */
165  buffer[size]=0;
166 
167  if (insideQuotes) {
168  DBG_DEBUG(GWEN_LOGDOMAIN, "Missing \" after word");
169  return 0;
170  }
171  /* check whether the source string was correctly terminated */
172  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
173  if (*src) {
174  if (strchr(delims, *src)==0) {
175  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
176  return 0;
177  }
178  }
179  else {
180  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
181  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
182  return 0;
183  }
184  }
185  }
186 
187  /* remove trailing blanks, if wanted */
189  if (lastBlankPos!=-1)
190  buffer[lastBlankPos]=0;
191  }
192 
193  *next=src;
194  return buffer;
195 }
196 
197 
198 
199 int GWEN_Text_GetWordToBuffer(const char *src,
200  const char *delims,
201  GWEN_BUFFER *buf,
202  uint32_t flags,
203  const char **next)
204 {
205  const char *savedSrc=src;
206  int lastWasBlank;
207  int lastBlankPos;
208  int insideQuotes;
209  int lastWasEscape;
210 
211  /* skip leading blanks, if wanted */
213  while (*src && (unsigned char)(*src)<33) {
214  if (strchr(delims, *src)) {
215  *next=src;
216  return 0;
217  }
218  src++;
219  }
220  }
221 
222  /* get word */
223  lastWasBlank=0;
224  lastBlankPos=-1;
225  lastWasEscape=0;
226  insideQuotes=0;
227 
228  if (*src=='"') {
229  insideQuotes=1;
230  if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
231  src++;
232  }
233 
234  while (*src) {
235  if (lastWasEscape) {
236  GWEN_Buffer_AppendByte(buf, *src);
237  lastWasEscape=0;
238  lastWasBlank=0;
239  lastBlankPos=-1;
240  }
241  else {
242  if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
243  lastWasEscape=1;
244  lastWasBlank=0;
245  lastBlankPos=-1;
246  }
247  else {
248  if (!insideQuotes && strchr(delims, *src)!=0)
249  break;
250  if (*src=='"') {
251  if (insideQuotes) {
252  insideQuotes=0;
253  src++;
254  break;
255  }
256  else {
258  "Found a closing \" without an opening one "
259  "(consider using a backslash to escape)");
260  return -1;
261  }
262  }
263 
264 
265  if (insideQuotes ||
266  !lastWasBlank ||
267  (lastWasBlank &&
269  /* only copy if last char was NOT blank or
270  * last was blank but the caller does not want to have multiple
271  * blanks removed */
272  GWEN_Buffer_AppendByte(buf, *src);
273  }
274  /* remember next loop whether this char was a blank */
275 
276  if (!lastWasEscape && *((unsigned char *)src)<33) {
277  lastWasBlank=1;
278  lastBlankPos=GWEN_Buffer_GetPos(buf);
279  }
280  else {
281  lastWasBlank=0;
282  lastBlankPos=-1;
283  }
284  } /* if this is not a backslash */
285  } /* !lastWasEscape */
286  /* advance source pointer */
287  src++;
288  } /* while */
289 
290  if (insideQuotes) {
291  DBG_ERROR(GWEN_LOGDOMAIN, "Missing \" after word (at %d: [%s])", (int)(src-savedSrc), savedSrc);
292  return -1;
293  }
294  /* check whether the source string was correctly terminated */
295  if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
296  if (*src) {
297  if (strchr(delims, *src)==0) {
298  DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
299  return -1;
300  }
301  }
302  else {
303  if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
304  DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
305  return -1;
306  }
307  }
308  }
309 
310  /* remove trailing blanks, if wanted */
312  if (lastBlankPos!=-1)
313  GWEN_Buffer_Crop(buf, 0, lastBlankPos);
314  }
315 
316  *next=src;
317  return 0;
318 }
319 
320 
321 
322 char *GWEN_Text_Escape(const char *src,
323  char *buffer,
324  unsigned int maxsize)
325 {
326  unsigned int size;
327 
328  size=0;
329  while (*src) {
330  unsigned char x;
331 
332  x=(unsigned char)*src;
333  if (!(
334  (x>='A' && x<='Z') ||
335  (x>='a' && x<='z') ||
336  (x>='0' && x<='9'))) {
337  unsigned char c;
338 
339  if ((maxsize-1)<size+3) {
340  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
341  return 0;
342  }
343  buffer[size++]='%';
344  c=(((unsigned char)(*src))>>4)&0xf;
345  if (c>9)
346  c+=7;
347  c+='0';
348  buffer[size++]=c;
349  c=((unsigned char)(*src))&0xf;
350  if (c>9)
351  c+=7;
352  c+='0';
353  buffer[size++]=c;
354  }
355  else {
356  if (size<(maxsize-1))
357  buffer[size++]=*src;
358  else {
359  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
360  return 0;
361  }
362  }
363 
364  src++;
365  } /* while */
366 
367  buffer[size]=0;
368  return buffer;
369 }
370 
371 
372 
373 char *GWEN_Text_EscapeTolerant(const char *src,
374  char *buffer,
375  unsigned int maxsize)
376 {
377  unsigned int size;
378 
379  size=0;
380  while (*src) {
381  unsigned char x;
382 
383  x=(unsigned char)*src;
384  if (!(
385  (x>='A' && x<='Z') ||
386  (x>='a' && x<='z') ||
387  (x>='0' && x<='9') ||
388  x==' ' ||
389  x=='.' ||
390  x==',' ||
391  x=='.' ||
392  x=='*' ||
393  x=='?'
394  )) {
395  unsigned char c;
396 
397  if ((maxsize-1)<size+3) {
398  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
399  return 0;
400  }
401  buffer[size++]='%';
402  c=(((unsigned char)(*src))>>4)&0xf;
403  if (c>9)
404  c+=7;
405  c+='0';
406  buffer[size++]=c;
407  c=((unsigned char)(*src))&0xf;
408  if (c>9)
409  c+=7;
410  c+='0';
411  buffer[size++]=c;
412  }
413  else {
414  if (size<(maxsize-1))
415  buffer[size++]=*src;
416  else {
417  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
418  return 0;
419  }
420  }
421 
422  src++;
423  } /* while */
424 
425  buffer[size]=0;
426  return buffer;
427 }
428 
429 
430 
431 char *GWEN_Text_UnescapeN(const char *src,
432  unsigned int srclen,
433  char *buffer,
434  unsigned int maxsize)
435 {
436  unsigned int size;
437 
438  size=0;
439 
440  while (*src && srclen>0) {
441  unsigned char x;
442 
443  x=(unsigned char)*src;
444  if (
445  (x>='A' && x<='Z') ||
446  (x>='a' && x<='z') ||
447  (x>='0' && x<='9')) {
448  if (size<(maxsize-1))
449  buffer[size++]=*src;
450  else {
451  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
452  return 0;
453  }
454  }
455  else {
456  if (*src=='%') {
457  unsigned char d1, d2;
458  unsigned char c;
459 
460  if (srclen<3) {
461  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
462  return 0;
463  }
464  /* skip '%' */
465  src++;
466  if (!(*src) || !isxdigit((int)*src)) {
467  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
468  return 0;
469  }
470  /* read first digit */
471  d1=(unsigned char)(toupper(*src));
472 
473  /* get second digit */
474  src++;
475  if (!(*src) || !isxdigit((int)*src)) {
476  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
477  return 0;
478  }
479  d2=(unsigned char)(toupper(*src));
480  /* compute character */
481  d1-='0';
482  if (d1>9)
483  d1-=7;
484  c=(d1<<4)&0xf0;
485  d2-='0';
486  if (d2>9)
487  d2-=7;
488  c+=(d2&0xf);
489  /* store character */
490  if (size<(maxsize-1))
491  buffer[size++]=(char)c;
492  else {
493  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
494  return 0;
495  }
496  srclen-=2;
497  }
498  else {
499  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
500  "characters in escaped string (\"%s\")",
501  src);
502  return 0;
503  }
504  }
505  srclen--;
506  src++;
507  } /* while */
508 
509  buffer[size]=0;
510  return buffer;
511 }
512 
513 
514 
515 char *GWEN_Text_Unescape(const char *src,
516  char *buffer,
517  unsigned int maxsize)
518 {
519  unsigned int srclen;
520 
521  srclen=strlen(src);
522  return GWEN_Text_UnescapeN(src, srclen, buffer, maxsize);
523 }
524 
525 
526 
527 char *GWEN_Text_UnescapeTolerantN(const char *src,
528  unsigned int srclen,
529  char *buffer,
530  unsigned int maxsize)
531 {
532  unsigned int size;
533 
534  size=0;
535 
536  while (*src && srclen>0) {
537  unsigned char x;
538 
539  x=(unsigned char)*src;
540  if (
541  (x>='A' && x<='Z') ||
542  (x>='a' && x<='z') ||
543  (x>='0' && x<='9') ||
544  x==' ' ||
545  x=='.' ||
546  x==',' ||
547  x=='.' ||
548  x=='*' ||
549  x=='?'
550  ) {
551  if (size<(maxsize-1))
552  buffer[size++]=*src;
553  else {
554  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
555  return 0;
556  }
557  }
558  else {
559  if (*src=='%') {
560  unsigned char d1, d2;
561  unsigned char c;
562 
563  if (srclen<3) {
564  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
565  return 0;
566  }
567  /* skip '%' */
568  src++;
569  if (!(*src) || !isxdigit((int)*src)) {
570  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
571  return 0;
572  }
573  /* read first digit */
574  d1=(unsigned char)(toupper(*src));
575 
576  /* get second digit */
577  src++;
578  if (!(*src) || !isxdigit((int)*src)) {
579  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
580  return 0;
581  }
582  d2=(unsigned char)(toupper(*src));
583  /* compute character */
584  d1-='0';
585  if (d1>9)
586  d1-=7;
587  c=(d1<<4)&0xf0;
588  d2-='0';
589  if (d2>9)
590  d2-=7;
591  c+=(d2&0xf);
592  /* store character */
593  if (size<(maxsize-1))
594  buffer[size++]=(char)c;
595  else {
596  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
597  return 0;
598  }
599  srclen-=2;
600  }
601  else {
602  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
603  "characters in escaped string (\"%s\")",
604  src);
605  return 0;
606  }
607  }
608  srclen--;
609  src++;
610  } /* while */
611 
612  buffer[size]=0;
613  return buffer;
614 }
615 
616 
617 
618 char *GWEN_Text_UnescapeTolerant(const char *src,
619  char *buffer,
620  unsigned int maxsize)
621 {
622  unsigned int srclen;
623 
624  srclen=strlen(src);
625  return GWEN_Text_UnescapeTolerantN(src, srclen, buffer, maxsize);
626 }
627 
628 
629 
630 char *GWEN_Text_ToHex(const char *src, unsigned l,
631  char *buffer, unsigned int maxsize)
632 {
633  unsigned int pos;
634  unsigned int size;
635 
636  if ((l*2)+1 > maxsize) {
637  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
638  return 0;
639  }
640 
641  pos=0;
642  size=0;
643  while (pos<l) {
644  unsigned char c;
645 
646  c=(((unsigned char)(src[pos]))>>4)&0xf;
647  if (c>9)
648  c+=7;
649  c+='0';
650  buffer[size++]=c;
651  c=((unsigned char)(src[pos]))&0xf;
652  if (c>9)
653  c+=7;
654  c+='0';
655  buffer[size++]=c;
656  pos++;
657  }
658  buffer[size]=0;
659  return buffer;
660 }
661 
662 
663 
664 char *GWEN_Text_ToHexGrouped(const char *src,
665  unsigned l,
666  char *buffer,
667  unsigned maxsize,
668  unsigned int groupsize,
669  char delimiter,
670  int skipLeadingZeroes)
671 {
672  unsigned int pos;
673  unsigned int size;
674  unsigned int j;
675 
676  j=0;
677 
678  pos=0;
679  size=0;
680  j=0;
681  while (pos<l) {
682  unsigned char c;
683  int skipThis;
684 
685  skipThis=0;
686  c=(((unsigned char)(src[pos]))>>4)&0xf;
687  if (skipLeadingZeroes) {
688  if (c==0)
689  skipThis=1;
690  else
691  skipLeadingZeroes=0;
692  }
693  if (c>9)
694  c+=7;
695  c+='0';
696  if (!skipThis) {
697  if (size+1>=maxsize) {
698  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
699  return 0;
700  }
701  buffer[size++]=c;
702  j++;
703  if (j==groupsize) {
704  if (size+1>=maxsize) {
705  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
706  return 0;
707  }
708  buffer[size++]=delimiter;
709  j=0;
710  }
711  }
712 
713  skipThis=0;
714  c=((unsigned char)(src[pos]))&0xf;
715  if (skipLeadingZeroes) {
716  if (c==0 && pos+1<l)
717  skipThis=1;
718  else
719  skipLeadingZeroes=0;
720  }
721  if (c>9)
722  c+=7;
723  c+='0';
724  if (size+1>=maxsize) {
725  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
726  return 0;
727  }
728  if (!skipThis) {
729  buffer[size++]=c;
730  j++;
731  if (j==groupsize) {
732  if (pos+1<l) {
733  if (size+1>=maxsize) {
734  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
735  return 0;
736  }
737  buffer[size++]=delimiter;
738  }
739  j=0;
740  }
741  }
742  pos++;
743  }
744  buffer[size]=0;
745  return buffer;
746 }
747 
748 
749 
750 int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
751  GWEN_BUFFER *buf,
752  unsigned int groupsize,
753  char delimiter,
754  int skipLeadingZeroes)
755 {
756  unsigned int pos = 0;
757  unsigned int j = 0;
758 
759  while (pos<l) {
760  unsigned char c;
761  int skipThis;
762 
763  skipThis=0;
764  c=(((unsigned char)(src[pos]))>>4)&0xf;
765  if (skipLeadingZeroes) {
766  if (c==0)
767  skipThis=1;
768  else
769  skipLeadingZeroes=0;
770  }
771  if (c>9)
772  c+=7;
773  c+='0';
774  if (!skipThis) {
775  if (GWEN_Buffer_AppendByte(buf, c)) {
776  DBG_INFO(GWEN_LOGDOMAIN, "here");
777  return -1;
778  }
779  j++;
780  if (groupsize && j==groupsize) {
781  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
782  DBG_INFO(GWEN_LOGDOMAIN, "here");
783  return -1;
784  }
785  j=0;
786  }
787  }
788 
789  skipThis=0;
790  c=((unsigned char)(src[pos]))&0xf;
791  if (skipLeadingZeroes) {
792  if (c==0 && pos+1<l)
793  skipThis=1;
794  else
795  skipLeadingZeroes=0;
796  }
797  if (c>9)
798  c+=7;
799  c+='0';
800  if (!skipThis) {
801  if (GWEN_Buffer_AppendByte(buf, c)) {
802  DBG_INFO(GWEN_LOGDOMAIN, "here");
803  return -1;
804  }
805  j++;
806  if (groupsize && j==groupsize) {
807  if (pos+1<l) {
808  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
809  DBG_INFO(GWEN_LOGDOMAIN, "here");
810  return -1;
811  }
812  }
813  j=0;
814  }
815  }
816  pos++;
817  }
818  return 0;
819 }
820 
821 
822 
823 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize)
824 {
825  unsigned int size = 0;
826 
827  while (*src) {
828  unsigned char d1, d2;
829  unsigned char c;
830 
831  /* read first digit */
832  if (!isxdigit((int)*src)) {
833  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
834  return -1;
835  }
836  d1=(unsigned char)(toupper(*src));
837 
838  /* get second digit */
839  src++;
840  if (!(*src) || !isxdigit((int)*src)) {
841  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
842  return -1;
843  }
844  d2=(unsigned char)(toupper(*src));
845  src++;
846 
847  /* compute character */
848  d1-='0';
849  if (d1>9)
850  d1-=7;
851  c=(d1<<4)&0xf0;
852  d2-='0';
853  if (d2>9)
854  d2-=7;
855  c+=(d2&0xf);
856  /* store character */
857  if (size<(maxsize))
858  buffer[size++]=(char)c;
859  else {
860  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (maxsize=%d)", maxsize);
861  return -1;
862  }
863  } /* while */
864 
865  return size;
866 }
867 
868 
869 
870 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf)
871 {
872  while (*src) {
873  unsigned char d1, d2;
874  unsigned char c;
875 
876  /* read first digit */
877  if (isspace((int)*src)) {
878  src++;
879  }
880  else {
881  if (!isxdigit((int)*src)) {
882  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
883  return -1;
884  }
885  d1=(unsigned char)(toupper(*src));
886 
887  /* get second digit */
888  src++;
889  if (!(*src) || !isxdigit((int)*src)) {
890  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
891  return -1;
892  }
893  d2=(unsigned char)(toupper(*src));
894  src++;
895 
896  /* compute character */
897  d1-='0';
898  if (d1>9)
899  d1-=7;
900  c=(d1<<4)&0xf0;
901  d2-='0';
902  if (d2>9)
903  d2-=7;
904  c+=(d2&0xf);
905  /* store character */
906  GWEN_Buffer_AppendByte(buf, (char)c);
907  }
908  } /* while */
909 
910  return 0;
911 }
912 
913 
914 
915 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf)
916 {
917  unsigned int l;
918  int fakeByte;
919 
920  l=strlen(src);
921  fakeByte=(l%2);
922  while (*src) {
923  unsigned char d1, d2;
924  unsigned char c;
925 
926  if (fakeByte) {
927  d1=0;
928  fakeByte=0;
929  }
930  else {
931  /* read first digit */
932  if (!isdigit((int)*src)) {
933  DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in bcd string");
934  return -1;
935  }
936  d1=(unsigned char)(*src);
937  src++;
938  }
939  /* get second digit */
940  if (!(*src) || !isxdigit((int)*src)) {
941  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete BCD byte (only 1 digit)");
942  return -1;
943  }
944  d2=(unsigned char)(*src);
945  src++;
946 
947  /* compute character */
948  d1-='0';
949  c=(d1<<4)&0xf0;
950  d2-='0';
951  c+=(d2&0xf);
952  /* store character */
953  GWEN_Buffer_AppendByte(buf, (char)c);
954  } /* while */
955 
956  return 0;
957 }
958 
959 
960 
961 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
962  GWEN_BUFFER *buf,
963  unsigned int groupsize,
964  char delimiter,
965  int skipLeadingZeroes)
966 {
967  unsigned int pos;
968  unsigned int j;
969 
970  j=0;
971 
972  pos=0;
973  j=0;
974  while (pos<l) {
975  unsigned char c;
976  int skipThis;
977 
978  skipThis=0;
979  c=(((unsigned char)(src[pos]))>>4)&0xf;
980  if (skipLeadingZeroes) {
981  if (c==0)
982  skipThis=1;
983  else
984  skipLeadingZeroes=0;
985  }
986  c+='0';
987  if (!skipThis) {
988  if (GWEN_Buffer_AppendByte(buf, c)) {
989  DBG_INFO(GWEN_LOGDOMAIN, "here");
990  return -1;
991  }
992  j++;
993  if (groupsize && j==groupsize) {
994  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
995  DBG_INFO(GWEN_LOGDOMAIN, "here");
996  return -1;
997  }
998  j=0;
999  }
1000  }
1001 
1002  skipThis=0;
1003  c=((unsigned char)(src[pos]))&0xf;
1004  if (skipLeadingZeroes) {
1005  if (c==0 && pos+1<l)
1006  skipThis=1;
1007  else
1008  skipLeadingZeroes=0;
1009  }
1010  c+='0';
1011  if (!skipThis) {
1012  if (GWEN_Buffer_AppendByte(buf, c)) {
1013  DBG_INFO(GWEN_LOGDOMAIN, "here");
1014  return -1;
1015  }
1016  j++;
1017  if (groupsize && j==groupsize) {
1018  if (pos+1<l) {
1019  if (GWEN_Buffer_AppendByte(buf, delimiter)) {
1020  DBG_INFO(GWEN_LOGDOMAIN, "here");
1021  return -1;
1022  }
1023  }
1024  j=0;
1025  }
1026  }
1027  pos++;
1028  }
1029  return 0;
1030 }
1031 
1032 
1033 
1034 int GWEN_Text_Compare(const char *s1, const char *s2, int ign)
1035 {
1036  if (s1)
1037  if (*s1==0)
1038  s1=0;
1039  if (s2)
1040  if (*s2==0)
1041  s2=0;
1042  if (!s1 && !s2)
1043  return 0;
1044  if (!s1 && s2)
1045  return 1;
1046  if (s1 && !s2)
1047  return -1;
1048  if (ign)
1049  return strcasecmp(s1, s2);
1050  else
1051  return strcmp(s1, s2);
1052 }
1053 
1054 
1055 
1056 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle)
1057 {
1058  while (*haystack) {
1059  while (*haystack && tolower(*haystack)!=tolower(*needle))
1060  haystack++;
1061  if (*haystack) {
1062  const char *t;
1063  const char *s;
1064 
1065  /* now haystack points to an area which begins with *needle */
1066  t=haystack;
1067  s=needle;
1068  t++;
1069  s++;
1070  while (*t && *s && (tolower(*t)==tolower(*s))) {
1071  t++;
1072  s++;
1073  }
1074  if (*s==0)
1075  return haystack;
1076  }
1077  else
1078  /* not found */
1079  break;
1080  /* no match here, advance haystack */
1081  haystack++;
1082  }
1083 
1084  /* not found */
1085  return NULL;
1086 }
1087 
1088 
1089 
1090 
1091 int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos,
1092  const char *p, unsigned int *ppos,
1093  int sensecase,
1094  unsigned int *matches)
1095 {
1096  char a;
1097  char b;
1098  unsigned wlength;
1099  unsigned plength;
1100 
1101  unsigned int _wpos = *wpos, _ppos = *ppos, _matches = *matches;
1102 
1103  a=0;
1104  b=0;
1105  wlength=strlen(w);
1106  plength=strlen(p);
1107 
1108  while (_wpos<wlength && _ppos<plength) {
1109  a=w[_wpos];
1110  b=p[_ppos];
1111  if (b=='*') {
1112  *wpos = _wpos;
1113  *ppos = _ppos;
1114  *matches = _matches;
1115  return 1;
1116  }
1117  if (!sensecase) {
1118  a=toupper(a);
1119  b=toupper(b);
1120  }
1121  /* count matches */
1122  if (a==b)
1123  ++_matches;
1124  if (a!=b && b!='?') {
1125  *wpos = _wpos;
1126  *ppos = _ppos;
1127  *matches = _matches;
1128  return 0;
1129  }
1130  ++_wpos;
1131  ++_ppos;
1132  }
1133  /* both at end, would be ok */
1134  if (_wpos==wlength && _ppos==plength) {
1135  *wpos = _wpos;
1136  *ppos = _ppos;
1137  *matches = _matches;
1138  return 1;
1139  }
1140  /* word ends, pattern doesnt, would be ok if pattern is '*' here */
1141  if (_wpos>=wlength && _ppos<plength)
1142  if (p[_ppos]=='*') {
1143  *wpos = _wpos;
1144  *ppos = _ppos;
1145  *matches = _matches;
1146  return 1;
1147  }
1148  /* otherwise no match ;-/ */
1149  *wpos = _wpos;
1150  *ppos = _ppos;
1151  *matches = _matches;
1152  return 0;
1153 }
1154 
1155 
1156 
1157 int GWEN_Text__findSegment(const char *w, unsigned int *wpos,
1158  const char *p, unsigned int *ppos,
1159  int sensecase,
1160  unsigned int *matches)
1161 {
1162  unsigned int lwpos, lppos, lmatches;
1163  unsigned wlength;
1164 
1165  wlength=strlen(w);
1166  lwpos=*wpos;
1167  lppos=*ppos;
1168  lmatches=*matches;
1169  while (lwpos<wlength) {
1170  *ppos=lppos;
1171  *wpos=lwpos;
1172  *matches=lmatches;
1173  if (GWEN_Text__cmpSegment(w, wpos, p, ppos, sensecase, matches))
1174  return 1;
1175  lwpos++;
1176  }
1177  return 0;
1178 }
1179 
1180 
1181 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase)
1182 {
1183  unsigned int ppos;
1184  unsigned int wpos;
1185  unsigned int matches;
1186  unsigned int plength;
1187 
1188  ppos=wpos=matches=0;
1189  plength=strlen(p);
1190 
1191  /* compare until first occurrence of '*' */
1192  if (!GWEN_Text__cmpSegment(w, &wpos, p, &ppos, sensecase, &matches)) {
1193  return -1;
1194  }
1195 
1196  while (1) {
1197  /* if pattern ends we have done it */
1198  if (ppos>=plength)
1199  return matches;
1200  /* skip '*' in pattern */
1201  ppos++;
1202  /* if pattern ends behind '*' the word matches */
1203  if (ppos>=plength)
1204  return matches;
1205  /* find next matching segment */
1206  if (!GWEN_Text__findSegment(w, &wpos, p, &ppos, sensecase, &matches)) {
1207  return -1;
1208  }
1209  } /* while */
1210  /* I know, we'll never get to here ;-) */
1211  return -1;
1212 }
1213 
1214 
1215 
1216 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
1217  int fillchar)
1218 {
1219  char lbuffer[128];
1220  unsigned int i;
1221 
1222  sprintf(lbuffer, "%d", num);
1223  i=strlen(lbuffer);
1224  if (i>=bufsize) {
1225  DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (%d>=%d)", i, bufsize);
1226  return -1;
1227  }
1228  if (fillchar>0) {
1229  /* fill right, but first copy chars */
1230  strcpy(buffer, lbuffer);
1231  while (i<bufsize-1)
1232  buffer[i++]=fillchar;
1233  buffer[i]=0;
1234  return bufsize;
1235  }
1236  else if (fillchar<0) {
1237  int j, k;
1238 
1239  fillchar=-fillchar;
1240  j=bufsize-1-i;
1241  for (k=0; k<j; k++)
1242  buffer[k]=fillchar;
1243  buffer[k]=0;
1244  strcat(buffer, lbuffer);
1245  return bufsize;
1246  }
1247  else {
1248  /* dont fill, just copy */
1249  strcpy(buffer, lbuffer);
1250  return i;
1251  }
1252 }
1253 
1254 
1255 
1256 void GWEN_Text_DumpString(const char *s, unsigned int l,
1257  unsigned int insert)
1258 {
1259  unsigned int i;
1260  unsigned int j;
1261  unsigned int pos;
1262  unsigned int k;
1263 
1264  pos=0;
1265  for (k=0; k<insert; k++)
1266  fprintf(stderr, " ");
1267  fprintf(stderr, "String size is %d:\n", l);
1268  while (pos<l) {
1269  for (k=0; k<insert; k++)
1270  fprintf(stderr, " ");
1271  fprintf(stderr, "%04x: ", pos);
1272  j=pos+16;
1273  if (j>=l)
1274  j=l;
1275 
1276  /* show hex dump */
1277  for (i=pos; i<j; i++) {
1278  fprintf(stderr, "%02x ", (unsigned char)s[i]);
1279  }
1280  if (j-pos<16)
1281  for (i=0; i<16-(j-pos); i++)
1282  fprintf(stderr, " ");
1283  /* show text */
1284  for (i=pos; i<j; i++) {
1285  if (s[i]<32)
1286  fprintf(stderr, ".");
1287  else
1288  fprintf(stderr, "%c", s[i]);
1289  }
1290  fprintf(stderr, "\n");
1291  pos+=16;
1292  }
1293 }
1294 
1295 
1296 
1297 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
1298  GWEN_BUFFER *mbuf,
1299  unsigned int insert)
1300 {
1301  unsigned int i;
1302  unsigned int j;
1303  unsigned int pos;
1304  unsigned k;
1305  char numbuf[32];
1306 
1307  pos=0;
1308  for (k=0; k<insert; k++)
1309  GWEN_Buffer_AppendByte(mbuf, ' ');
1310  GWEN_Buffer_AppendString(mbuf, "String size is ");
1311  snprintf(numbuf, sizeof(numbuf), "%d", l);
1312  GWEN_Buffer_AppendString(mbuf, numbuf);
1313  GWEN_Buffer_AppendByte(mbuf, '\n');
1314  while (pos<l) {
1315  for (k=0; k<insert; k++)
1316  GWEN_Buffer_AppendByte(mbuf, ' ');
1317  snprintf(numbuf, sizeof(numbuf), "%04x: ", pos);
1318  GWEN_Buffer_AppendString(mbuf, numbuf);
1319  j=pos+16;
1320  if (j>=l)
1321  j=l;
1322 
1323  /* show hex dump */
1324  for (i=pos; i<j; i++) {
1325  snprintf(numbuf, sizeof(numbuf), "%02x ", (unsigned char)s[i]);
1326  GWEN_Buffer_AppendString(mbuf, numbuf);
1327  }
1328  if (j-pos<16)
1329  for (i=0; i<16-(j-pos); i++)
1330  GWEN_Buffer_AppendString(mbuf, " ");
1331  /* show text */
1332  for (i=pos; i<j; i++) {
1333  if (s[i]<32)
1334  GWEN_Buffer_AppendByte(mbuf, '.');
1335  else
1336  GWEN_Buffer_AppendByte(mbuf, s[i]);
1337  }
1338  GWEN_Buffer_AppendByte(mbuf, '\n');
1339  pos+=16;
1340  }
1341 }
1342 
1343 
1344 
1345 
1346 
1347 
1348 
1349 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf)
1350 {
1351  while (*src) {
1352  unsigned char x;
1353 
1354  x=(unsigned char)*src;
1355  if (!(
1356  (x>='A' && x<='Z') ||
1357  (x>='a' && x<='z') ||
1358  (x>='0' && x<='9'))) {
1359  unsigned char c;
1360 
1361  GWEN_Buffer_AppendByte(buf, '%');
1362  c=(((unsigned char)(*src))>>4)&0xf;
1363  if (c>9)
1364  c+=7;
1365  c+='0';
1366  GWEN_Buffer_AppendByte(buf, c);
1367  c=((unsigned char)(*src))&0xf;
1368  if (c>9)
1369  c+=7;
1370  c+='0';
1371  GWEN_Buffer_AppendByte(buf, c);
1372  }
1373  else
1374  GWEN_Buffer_AppendByte(buf, *src);
1375 
1376  src++;
1377  } /* while */
1378 
1379  return 0;
1380 }
1381 
1382 
1383 
1384 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf)
1385 {
1386  while (*src) {
1387  unsigned char x;
1388 
1389  x=(unsigned char)*src;
1390  if (
1391  (x>='A' && x<='Z') ||
1392  (x>='a' && x<='z') ||
1393  (x>='0' && x<='9')) {
1394  GWEN_Buffer_AppendByte(buf, *src);
1395  }
1396  else {
1397  if (*src=='%') {
1398  unsigned char d1, d2;
1399  unsigned char c;
1400 
1401  /* skip '%' */
1402  src++;
1403  if (!(*src) || !isxdigit((int)*src)) {
1404  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
1405  return 0;
1406  }
1407  /* read first digit */
1408  d1=(unsigned char)(toupper(*src));
1409 
1410  /* get second digit */
1411  src++;
1412  if (!(*src) || !isxdigit((int)*src)) {
1413  DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
1414  return 0;
1415  }
1416  d2=(unsigned char)(toupper(*src));
1417  /* compute character */
1418  d1-='0';
1419  if (d1>9)
1420  d1-=7;
1421  c=(d1<<4)&0xf0;
1422  d2-='0';
1423  if (d2>9)
1424  d2-=7;
1425  c+=(d2&0xf);
1426  /* store character */
1427  GWEN_Buffer_AppendByte(buf, (char)c);
1428  }
1429  else {
1430  DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
1431  "characters in escaped string (\"%s\")",
1432  src);
1433  return -1;
1434  }
1435  }
1436  src++;
1437  } /* while */
1438 
1439  return 0;
1440 }
1441 
1442 
1443 
1445 {
1446  while (*src) {
1447  unsigned char x;
1448 
1449  x=(unsigned char)*src;
1450  if (!(
1451  (x>='A' && x<='Z') ||
1452  (x>='a' && x<='z') ||
1453  (x>='0' && x<='9') ||
1454  x==' ' ||
1455  x=='.' ||
1456  x==',' ||
1457  x=='.' ||
1458  x=='_' ||
1459  x=='-' ||
1460  x=='*' ||
1461  x=='?'
1462  )) {
1463  unsigned char c;
1464 
1465  GWEN_Buffer_AppendByte(buf, '%');
1466  c=(((unsigned char)(*src))>>4)&0xf;
1467  if (c>9)
1468  c+=7;
1469  c+='0';
1470  GWEN_Buffer_AppendByte(buf, c);
1471  c=((unsigned char)(*src))&0xf;
1472  if (c>9)
1473  c+=7;
1474  c+='0';
1475  GWEN_Buffer_AppendByte(buf, c);
1476  }
1477  else
1478  GWEN_Buffer_AppendByte(buf, *src);
1479 
1480  src++;
1481  } /* while */
1482 
1483  return 0;
1484 }
1485 
1486 
1487 
1489 {
1490  while (*src) {
1491  //const char *srcBak=src;
1492 
1493  int charHandled=0;
1494  if (*src=='%') {
1495  if (strlen(src)>2) {
1496  unsigned char d1, d2;
1497  unsigned char c;
1498 
1499  if (isxdigit((int)src[1]) && isxdigit((int)src[2])) {
1500  /* skip '%' */
1501  src++;
1502  /* read first digit */
1503  d1=(unsigned char)(toupper(*src));
1504 
1505  /* get second digit */
1506  src++;
1507  d2=(unsigned char)(toupper(*src));
1508  /* compute character */
1509  d1-='0';
1510  if (d1>9)
1511  d1-=7;
1512  c=(d1<<4)&0xf0;
1513  d2-='0';
1514  if (d2>9)
1515  d2-=7;
1516  c+=(d2&0xf);
1517  /* store character */
1518  GWEN_Buffer_AppendByte(buf, (char)c);
1519  charHandled=1;
1520  }
1521  }
1522  }
1523  if (!charHandled)
1524  GWEN_Buffer_AppendByte(buf, *src);
1525  src++;
1526  } /* while */
1527 
1528  return 0;
1529 }
1530 
1531 
1532 
1534 {
1535  while (GWEN_Buffer_GetBytesLeft(src)) {
1536  int z;
1537  unsigned char x;
1538 
1539  z=GWEN_Buffer_ReadByte(src);
1540  if (z==-1) {
1541  DBG_INFO(GWEN_LOGDOMAIN, "here");
1542  return -1;
1543  }
1544  x=(unsigned char)z;
1545  if (!(
1546  (x>='A' && x<='Z') ||
1547  (x>='a' && x<='z') ||
1548  (x>='0' && x<='9') ||
1549  x==' ' ||
1550  x=='.' ||
1551  x==',' ||
1552  x=='.' ||
1553  x=='*' ||
1554  x=='?'
1555  )) {
1556  unsigned char c;
1557 
1558  GWEN_Buffer_AppendByte(buf, '%');
1559  c=(((unsigned char)x)>>4)&0xf;
1560  if (c>9)
1561  c+=7;
1562  c+='0';
1563  GWEN_Buffer_AppendByte(buf, c);
1564  c=((unsigned char)x)&0xf;
1565  if (c>9)
1566  c+=7;
1567  c+='0';
1568  GWEN_Buffer_AppendByte(buf, c);
1569  }
1570  else
1571  GWEN_Buffer_AppendByte(buf, x);
1572  } /* while */
1573 
1574  return 0;
1575 }
1576 
1577 
1578 
1579 void GWEN_Text_LogString(const char *s, unsigned int l,
1580  const char *logDomain,
1581  GWEN_LOGGER_LEVEL lv)
1582 {
1583  GWEN_BUFFER *mbuf;
1584 
1585  mbuf=GWEN_Buffer_new(0, ((l*16)<1024)?1024:l*16, 0, 1);
1586  GWEN_Text_DumpString2Buffer(s, l, mbuf, 0);
1587  GWEN_Logger_Log(logDomain, lv, GWEN_Buffer_GetStart(mbuf));
1588  GWEN_Buffer_free(mbuf);
1589 }
1590 
1591 
1592 
1594 {
1595  const char *p;
1596  char *dst;
1597  unsigned int size;
1598  unsigned int i;
1599  int lastWasBlank;
1600  char *lastBlankPos;
1601 
1602  dst=GWEN_Buffer_GetStart(buf);
1603  p=dst;
1604  size=GWEN_Buffer_GetUsedBytes(buf);
1605  lastWasBlank=0;
1606  lastBlankPos=0;
1607 
1608  for (i=0; i<size; i++) {
1609  /* remember next loop whether this char was a blank */
1610  if (isspace((int)*p)) {
1611  if (!lastWasBlank) {
1612  /* store only one blank */
1613  lastWasBlank=1;
1614  lastBlankPos=dst;
1615  *(dst++)=*p;
1616  }
1617  }
1618  else {
1619  lastWasBlank=0;
1620  lastBlankPos=0;
1621  *(dst++)=*p;
1622  }
1623  p++;
1624  }
1625 
1626  /* remove trailing blanks */
1627  if (lastBlankPos!=0)
1628  dst=lastBlankPos;
1629 
1630  size=dst-GWEN_Buffer_GetStart(buf);
1631  GWEN_Buffer_Crop(buf, 0, size);
1632 }
1633 
1634 
1635 
1637 {
1638  char numbuf[128];
1639  int rv;
1640 #ifdef HAVE_SETLOCALE
1641  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1642  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1643  setlocale(LC_NUMERIC, "C");
1644 #endif
1645 
1646  rv=snprintf(numbuf, sizeof(numbuf), "%f", num);
1647 
1648 #ifdef HAVE_SETLOCALE
1649  setlocale(LC_NUMERIC, currentLocale);
1650  free(currentLocale);
1651 #endif
1652 
1653  if (rv<1 || rv>=sizeof(numbuf))
1654  return -1;
1655  GWEN_Buffer_AppendString(buf, numbuf);
1656  return 0;
1657 }
1658 
1659 
1660 
1661 int GWEN_Text_StringToDouble(const char *s, double *num)
1662 {
1663  int rv;
1664 #ifdef HAVE_SETLOCALE
1665  const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1666  char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1667  setlocale(LC_NUMERIC, "C");
1668 #endif
1669 
1670  rv=sscanf(s, "%lf", num);
1671 
1672 #ifdef HAVE_SETLOCALE
1673  setlocale(LC_NUMERIC, currentLocale);
1674  free(currentLocale);
1675 #endif
1676 
1677  if (rv!=1)
1678  return -1;
1679  return 0;
1680 }
1681 
1682 
1683 
1684 double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign)
1685 {
1686  int nboth;
1687  int nmatch;
1688  double pc;
1689 
1690  nboth=strlen(s1)+strlen(s2);
1691  nmatch=0;
1692  if (ign) {
1693  while (*s1 && *s2) {
1694  const char *t;
1695  int lmatch;
1696 
1697  /* find next equal in s2 */
1698  t=s2;
1699  lmatch=0;
1700  while (*t) {
1701  if (toupper(*s1)==toupper(*t)) {
1702  lmatch=2;
1703  break;
1704  }
1705  if (isalnum((int)*s1) && isalnum((int)*t)) {
1706  lmatch=1;
1707  break;
1708  }
1709  t++;
1710  } /* while */
1711 
1712  if (lmatch) {
1713  nmatch+=lmatch;
1714  s2=t+1;
1715  }
1716 
1717  s1++;
1718  } /* while */
1719  }
1720  else {
1721  while (*s1 && *s2) {
1722  const char *t;
1723  int lmatch;
1724 
1725  /* find next equal in s2 */
1726  t=s2;
1727  lmatch=0;
1728  while (*t) {
1729  if (*s1==*t) {
1730  lmatch=2;
1731  break;
1732  }
1733  if (toupper(*s1)==toupper(*t)) {
1734  lmatch=1;
1735  break;
1736  }
1737  if (isalnum((int)*s1) && isalnum((int)*t)) {
1738  lmatch=1;
1739  break;
1740  }
1741  t++;
1742  } /* while */
1743 
1744  if (lmatch) {
1745  nmatch+=lmatch;
1746  s2=t+1;
1747  }
1748 
1749  s1++;
1750  } /* while */
1751  }
1752 
1753  pc=(nmatch*100)/nboth;
1754  return pc;
1755 }
1756 
1757 
1758 
1759 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign)
1760 {
1761  double pc1, pc2;
1762 
1763  pc1=GWEN_Text__CheckSimilarity(s1, s2, ign);
1764  pc2=GWEN_Text__CheckSimilarity(s2, s1, ign);
1765  if (pc2>pc1)
1766  return pc2;
1767  return pc1;
1768 }
1769 
1770 
1771 
1772 int GWEN_Text_CountUtf8Chars(const char *s, int len)
1773 {
1774  int count;
1775  int handled;
1776 
1777  if (len==0)
1778  len=strlen(s);
1779  count=0;
1780  handled=0;
1781  while (handled<len) {
1782  unsigned char c;
1783  int i;
1784 
1785  c=(unsigned char)*s;
1786  if ((c & 0xfe)==0xfc)
1787  i=5;
1788  else if ((c & 0xfc)==0xf8)
1789  i=4;
1790  else if ((c & 0xf8)==0xf0)
1791  i=3;
1792  else if ((c & 0xf0)==0xe0)
1793  i=2;
1794  else if ((c & 0xe0)==0xc0)
1795  i=1;
1796  else if (c & 0x80) {
1797  DBG_ERROR(GWEN_LOGDOMAIN, "Invalid UTF8 character at pos %d", handled);
1798  return -1;
1799  }
1800  else
1801  i=0;
1802  if (handled+i+1>len) {
1804  "Incomplete UTF8 sequence at pos %d", handled);
1805  return -1;
1806  }
1807  s++;
1808  if (i) {
1809  int j;
1810 
1811  for (j=0; j<i; j++) {
1812  if ((((unsigned char)*s) & 0xc0)!=0xc0) {
1814  "Invalid UTF8 sequence at pos %d (rel %d of %d)",
1815  handled, j, i);
1816  }
1817  s++;
1818  }
1819  }
1820  handled+=i+1;
1821  count++;
1822  } /* while */
1823 
1824  return count;
1825 }
1826 
1827 
1828 
1829 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
1830 {
1831  char *pdst;
1832  uint32_t roomLeft;
1833  uint32_t bytesAdded;
1834 
1835 #define GWEN_TEXT__APPENDCHAR(chr) \
1836  if (roomLeft<2) { \
1837  if (bytesAdded) { \
1838  GWEN_Buffer_IncrementPos(buf, bytesAdded); \
1839  GWEN_Buffer_AdjustUsedBytes(buf); \
1840  } \
1841  GWEN_Buffer_AllocRoom(buf, 2); \
1842  pdst=GWEN_Buffer_GetPosPointer(buf); \
1843  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf); \
1844  bytesAdded=0; \
1845  } \
1846  *(pdst++)=(unsigned char)chr; \
1847  *pdst=0; \
1848  bytesAdded++; \
1849  roomLeft--
1850 
1851  pdst=GWEN_Buffer_GetPosPointer(buf);
1852  roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf);
1853  bytesAdded=0;
1854 
1855  while (*src) {
1856  unsigned char x;
1857  int match;
1858 
1859  match=0;
1860  x=(unsigned char)*src;
1861  if (x=='&') {
1862  if (src[1]=='#') {
1863  unsigned char num=0;
1864 
1865  src++;
1866  src++;
1867  while (*src && isdigit((int)*src)) {
1868  num*=10;
1869  num+=(*src)-'0';
1870  src++;
1871  }
1872  src++;
1873  GWEN_TEXT__APPENDCHAR(num);
1874  }
1875  else if (strncmp(src+1, "szlig;", 6)==0) {
1876  GWEN_TEXT__APPENDCHAR(0xc3);
1877  GWEN_TEXT__APPENDCHAR(0x9f);
1878  src+=7;
1879  match=1;
1880  }
1881  else if (strncmp(src+1, "Auml;", 5)==0) {
1882  GWEN_TEXT__APPENDCHAR(0xc3);
1883  GWEN_TEXT__APPENDCHAR(0x84);
1884  src+=6;
1885  match=1;
1886  }
1887  else if (strncmp(src+1, "Ouml;", 5)==0) {
1888  GWEN_TEXT__APPENDCHAR(0xc3);
1889  GWEN_TEXT__APPENDCHAR(0x96);
1890  src+=6;
1891  match=1;
1892  }
1893  else if (strncmp(src+1, "Uuml;", 5)==0) {
1894  GWEN_TEXT__APPENDCHAR(0xc3);
1895  GWEN_TEXT__APPENDCHAR(0x9c);
1896  src+=6;
1897  match=1;
1898  }
1899  else if (strncmp(src+1, "auml;", 5)==0) {
1900  GWEN_TEXT__APPENDCHAR(0xc3);
1901  GWEN_TEXT__APPENDCHAR(0xa4);
1902  src+=6;
1903  match=1;
1904  }
1905  else if (strncmp(src+1, "ouml;", 5)==0) {
1906  GWEN_TEXT__APPENDCHAR(0xc3);
1907  GWEN_TEXT__APPENDCHAR(0xb6);
1908  src+=6;
1909  match=1;
1910  }
1911  else if (strncmp(src+1, "uuml;", 5)==0) {
1912  GWEN_TEXT__APPENDCHAR(0xc3);
1913  GWEN_TEXT__APPENDCHAR(0xbc);
1914  src+=6;
1915  match=1;
1916  }
1917  else {
1918  const GWEN_TEXT_ESCAPE_ENTRY *e;
1920  while (e->replace) {
1921  int l;
1922 
1923  l=strlen(e->replace);
1924  if (strncasecmp(src, e->replace, l)==0) {
1926  //GWEN_Buffer_AppendByte(buf, e->character);
1927  src+=l;
1928  match=1;
1929  break;
1930  }
1931  e++;
1932  } /* while */
1933  }
1934  }
1935  if (!match) {
1936  GWEN_TEXT__APPENDCHAR(*(src++));
1937  }
1938  } /* while */
1939 
1940  if (bytesAdded) {
1941  GWEN_Buffer_IncrementPos(buf, bytesAdded);
1943  }
1944 
1945  return 0;
1946 #undef GWEN_TEXT__APPENDCHAR
1947 }
1948 
1949 
1950 
1951 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
1952 {
1953  while (*src) {
1954  unsigned char x;
1955  const GWEN_TEXT_ESCAPE_ENTRY *e;
1956  int match;
1957 
1958  match=0;
1959  x=(unsigned char)*src;
1961  while (e->replace) {
1962  if (x==e->character) {
1964  match=1;
1965  break;
1966  }
1967  e++;
1968  } /* while */
1969 
1970  if (!match) {
1971  if (0 && x>127) { /* disabled */
1972  char numbuf[32];
1973 
1974  snprintf(numbuf, sizeof(numbuf), "&#%d;", x);
1975  GWEN_Buffer_AppendString(buf, numbuf);
1976  }
1977  else
1978  GWEN_Buffer_AppendByte(buf, *src);
1979  }
1980  src++;
1981  } /* while */
1982 
1983  return 0;
1984 }
1985 
1986 
1987 
1988 int GWEN_Text_ConvertCharset(const char *fromCharset,
1989  const char *toCharset,
1990  const char *text, int len,
1991  GWEN_BUFFER *tbuf)
1992 {
1993  if (len) {
1994  if (fromCharset && *fromCharset && toCharset && *toCharset &&
1995  strcasecmp(fromCharset, toCharset)!=0) {
1996 #ifndef HAVE_ICONV
1998  "iconv not available, can not convert from \"%s\" to \"%s\"",
1999  fromCharset, toCharset);
2000 #else
2001  iconv_t ic;
2002 
2003  ic=iconv_open(toCharset, fromCharset);
2004  if (ic==((iconv_t)-1)) {
2005  DBG_ERROR(GWEN_LOGDOMAIN, "Charset \"%s\" or \"%s\" not available",
2006  fromCharset, toCharset);
2007  }
2008  else {
2009  char *outbuf;
2010  char *pOutbuf;
2011  /* Some systems have iconv in libc, some have it in libiconv
2012  (OSF/1 and those with the standalone portable GNU libiconv
2013  installed). Check which one is available. The define
2014  ICONV_CONST will be "" or "const" accordingly. */
2015  ICONV_CONST char *pInbuf;
2016  size_t inLeft;
2017  size_t outLeft;
2018  size_t done;
2019  size_t space;
2020 
2021  /* convert */
2022  pInbuf=(char *)text;
2023 
2024  outLeft=len*2;
2025  space=outLeft;
2026  outbuf=(char *)malloc(outLeft);
2027  assert(outbuf);
2028 
2029  inLeft=len;
2030  pInbuf=(char *)text;
2031  pOutbuf=outbuf;
2032  done=iconv(ic, &pInbuf, &inLeft, &pOutbuf, &outLeft);
2033  if (done==(size_t)-1) {
2034  DBG_ERROR(GWEN_LOGDOMAIN, "Error in conversion: %s (%d)",
2035  strerror(errno), errno);
2036  free(outbuf);
2037  iconv_close(ic);
2038  return GWEN_ERROR_GENERIC;
2039  }
2040 
2041  GWEN_Buffer_AppendBytes(tbuf, outbuf, space-outLeft);
2042  free(outbuf);
2043  DBG_DEBUG(GWEN_LOGDOMAIN, "Conversion done.");
2044  iconv_close(ic);
2045  return 0;
2046  }
2047 #endif
2048  }
2049 
2050  GWEN_Buffer_AppendBytes(tbuf, text, len);
2051  }
2052  return 0;
2053 }
2054 
int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition: text.c:1091
uint32_t GWEN_Buffer_GetBytesLeft(GWEN_BUFFER *bf)
Definition: buffer.c:606
char * GWEN_Buffer_GetStart(const GWEN_BUFFER *bf)
Definition: buffer.c:235
void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf)
Definition: text.c:1593
#define GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS
Definition: text.h:45
const char * replace
Definition: text.c:59
Definition: text.c:57
uint32_t GWEN_Buffer_GetMaxUnsegmentedWrite(GWEN_BUFFER *bf)
Definition: buffer.c:597
uint32_t GWEN_Buffer_GetUsedBytes(const GWEN_BUFFER *bf)
Definition: buffer.c:282
void GWEN_Text_DumpString(const char *s, unsigned int l, unsigned int insert)
Definition: text.c:1256
GWEN_LOGGER_LEVEL
Definition: logger.h:64
#define NULL
Definition: binreloc.c:297
int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1444
int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize)
Definition: text.c:823
int GWEN_Text_ConvertCharset(const char *fromCharset, const char *toCharset, const char *text, int len, GWEN_BUFFER *tbuf)
Definition: text.c:1988
int GWEN_Buffer_AdjustUsedBytes(GWEN_BUFFER *bf)
Definition: buffer.c:538
#define GWEN_LOGDOMAIN
Definition: logger.h:35
uint32_t GWEN_Buffer_GetPos(const GWEN_BUFFER *bf)
Definition: buffer.c:253
static const GWEN_TEXT_ESCAPE_ENTRY gwen_text__xml_escape_chars[]
Definition: text.c:62
GWEN_BUFFER * GWEN_Buffer_new(char *buffer, uint32_t size, uint32_t used, int take)
Definition: buffer.c:38
int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1488
char * GWEN_Buffer_GetPosPointer(const GWEN_BUFFER *bf)
Definition: buffer.c:618
char * GWEN_Text_GetWord(const char *src, const char *delims, char *buffer, unsigned int maxsize, uint32_t flags, const char **next)
Definition: text.c:73
int GWEN_Buffer_IncrementPos(GWEN_BUFFER *bf, uint32_t i)
Definition: buffer.c:519
double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign)
Definition: text.c:1759
int GWEN_Text_CountUtf8Chars(const char *s, int len)
Definition: text.c:1772
#define ICONV_CONST
Definition: text.c:43
void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l, GWEN_BUFFER *mbuf, unsigned int insert)
Definition: text.c:1297
#define GWEN_TEXT_FLAGS_NULL_IS_DELIMITER
Definition: text.h:48
void GWEN_Logger_Log(const char *logDomain, GWEN_LOGGER_LEVEL priority, const char *s)
Definition: logger.c:553
#define DBG_DEBUG(dbg_logger, format, args...)
Definition: debug.h:208
#define GWEN_TEXT_FLAGS_NEED_DELIMITER
Definition: text.h:47
int GWEN_Text_ToBcdBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:961
int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1349
const char * GWEN_Text_StrCaseStr(const char *haystack, const char *needle)
Definition: text.c:1056
char * GWEN_Text_Unescape(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:515
#define GWEN_ERROR_GENERIC
Definition: error.h:62
#define GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS
Definition: text.h:46
void GWEN_Text_LogString(const char *s, unsigned int l, const char *logDomain, GWEN_LOGGER_LEVEL lv)
Definition: text.c:1579
char * GWEN_Text_UnescapeTolerantN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition: text.c:527
int GWEN_Buffer_AppendByte(GWEN_BUFFER *bf, char c)
Definition: buffer.c:399
int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:915
int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1829
double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign)
Definition: text.c:1684
int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1951
void GWEN_Buffer_free(GWEN_BUFFER *bf)
Definition: buffer.c:85
struct GWEN_BUFFER GWEN_BUFFER
A dynamically resizeable text buffer.
Definition: buffer.h:41
int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:1384
int GWEN_Text_Compare(const char *s1, const char *s2, int ign)
Definition: text.c:1034
int GWEN_Text__findSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition: text.c:1157
int GWEN_Buffer_Crop(GWEN_BUFFER *bf, uint32_t pos, uint32_t l)
Definition: buffer.c:1020
int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize, int fillchar)
Definition: text.c:1216
#define DBG_ERROR(dbg_logger, format, args...)
Definition: debug.h:97
char * GWEN_Text_ToHex(const char *src, unsigned l, char *buffer, unsigned int maxsize)
Definition: text.c:630
int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase)
Definition: text.c:1181
char * GWEN_Text_ToHexGrouped(const char *src, unsigned l, char *buffer, unsigned maxsize, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:664
int GWEN_Text_StringToDouble(const char *s, double *num)
Definition: text.c:1661
#define DBG_INFO(dbg_logger, format, args...)
Definition: debug.h:177
#define GWEN_TEXT_FLAGS_DEL_QUOTES
Definition: text.h:49
char * GWEN_Text_UnescapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:618
int GWEN_Text_GetWordToBuffer(const char *src, const char *delims, GWEN_BUFFER *buf, uint32_t flags, const char **next)
Definition: text.c:199
char * GWEN_Text_EscapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:373
int GWEN_Buffer_ReadByte(GWEN_BUFFER *bf)
Definition: buffer.c:500
char * GWEN_Text_Escape(const char *src, char *buffer, unsigned int maxsize)
Definition: text.c:322
#define GWEN_TEXT__APPENDCHAR(chr)
#define GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS
Definition: text.h:44
int GWEN_Buffer_AppendBytes(GWEN_BUFFER *bf, const char *buffer, uint32_t size)
Definition: buffer.c:366
int character
Definition: text.c:58
#define GWEN_TEXT_FLAGS_CHECK_BACKSLASH
Definition: text.h:50
int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf)
Definition: text.c:870
int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf)
Definition: text.c:1533
int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf)
Definition: text.c:1636
int GWEN_Text_ToHexBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition: text.c:750
int GWEN_Buffer_AppendString(GWEN_BUFFER *bf, const char *buffer)
Definition: buffer.c:1062
char * GWEN_Text_UnescapeN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition: text.c:431