ztring.c: A few important extension for <string.h>
 All Classes Files Functions Variables Enumerator Friends Macros
ztring.c
Go to the documentation of this file.
1 /* ztring.c (C) 2014 adolfo@di-mare.com */
2 
3 /** \file ztring.c
4  \brief Implementation for <ztring.h>
5  \author Adolfo Di Mare <adolfo@di-mare.com>
6  \date 2014
7 */
8 
9 #include "ztring.h"
10 
11 #ifdef __cplusplus
12  #include <cassert> // assert()
13 #else
14  #include <assert.h> /* assert() */
15 #endif
16 
17 /** Copies up to 'size' characters from 'src' to 'dest'.
18 
19  This is a 'size' checked versions of 'strcpy()'. The 'dest' memory block
20  will always be a null terminated C string (unless 'size' is zero or
21  'dest' is NULL).
22 
23  Stops copying when the end of the source C string is found (which is
24  signaled by a null-character), even if less than 'size' characters have
25  been copied (but 'dest' is not padded with zeros).
26 
27  A null-character is implicitly appended at the end of 'dest' if the
28  length of the 'src' C string is 'size' or bigger; in this case, only a
29  portion of the leading characters from 'src' get copied into 'dest'.
30 
31  'dest' and 'src' shall not overlap (see 'memmove()' for a safer
32  alternative when overlapping).
33 
34  \see http://www.drdobbs.com/managed-string-library-for-c/184402023
35  \see http://en.wikipedia.org/wiki/C_string_handling
36 
37  \remark An alternative to this function is 'strlcpy()': when the size of
38  'dest' is not big enough, invoking 'strlcpy()' will return the minimun
39  size that 'dest' should have to fit every character from 'src'.
40  \see http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
41 
42  \remarks Note the differences between this function and strncpy():
43  - strncpy() doesn't always NUL terminate; ztrcpy() does.
44  - strncpy() pads the destination string with NULs, which is often
45  unnecessary; ztrcpy() does not.
46  - zstrcpy() returns a pointer to the destination 'dest' string;
47  this is the same behaviour of both strcpy() and strncpy().
48 
49  \param size Size of the 'dest' memory block.
50  \param dest Pointer to the destination array where the content is to be copied.
51  \param src C string to be copied.
52  \returns 'dest' is returned.
53 
54  \test_example{ztrcpy}
55  \test_example{BeUNsafe::SZ}
56 */
57 char* ztrcpy( size_t size, char * dest, const char * src ) {
58  if ( dest==NULL || size==0 ) { return dest; }
59  else if ( size==1 ) { *dest=0; return dest; }
60  else {
61  char *copy = dest;
62  size_t len = 0;
63  --size;
64  while ( len<size ) {
65  if ( 0 == (*copy = *src) ) { break; }
66  ++copy; ++src; ++len;
67  }
68  *copy = 0;
69  return dest;
70  }
71 }
72 
73 /** Append characters to a string.
74 
75  This is a 'size' checked versions of 'strcat()'. The 'dest' memory block
76  will always be a null terminated C string (unless 'size' is zero or
77  'dest' is NULL in which case no characters would be appended).
78 
79  If appending all characters from 'src' would result in a string with
80  'size' or more characters, a null-character is implicitly appended to
81  'dest' to ensure that its length is less than 'size'; in this case, only
82  a portion of the leading characters from 'src' would be appended.
83 
84  \see http://www.openbsd.org/cgi-bin/man.cgi?query=strlcat
85  \see http://en.wikipedia.org/wiki/C_string_handling
86 
87  \param size Size of the 'dest' memory block.
88  \param dest Pointer to the destination array of 'size' characters.
89  \param src C string to be appended.
90 
91  \returns 'dest' is returned.
92  \test_example{ztrcat}
93  \test_example{BeUNsafe::SZ}
94 */
95 char* ztrcat( size_t size, char * dest, const char * src ) {
96  if ( dest==NULL || size==0 ) { return dest; }
97  else if ( size==1 ) { *dest=0; return dest; }
98  else {
99  char * copy = dest;
100  size_t destlen = strlen( dest );
101  --size;
102  if ( destlen>size ) { destlen = size; }
103  copy += destlen;
104  while ( destlen<size ) {
105  if ( 0 == (*copy = *src) ) { break; }
106  ++copy; ++src; ++destlen;
107  }
108  *copy = 0;
109  return dest;
110  }
111 }
112 
113 #ifndef NDEBUG
114  #include "uUnit.h"
115 #endif
116 
117 /** Inserts string 'insert' into 'dest' at position 'n'.
118 
119  If inserting all characters from 'dest' would result in a string with
120  'size' or more characters, a null-character is implicitly appended to
121  'dest' to ensure that its length is less than 'size'; in this case, only
122  a portion of the leading characters from 'insert' would be inserted into
123  'dest'. The 'dest' memory block will always be a null terminated C
124  string.
125 
126  Any of the following conditions leaves the value in 'dest' unchanged:
127  - (insert[0]==0) -> 'insert' is the null string
128  - (size == 0)
129  - (dest == NULL)
130 
131  Any of the following conditions will null terminate 'dest':
132  - (n >= size)
133  - (n > strlen(dest)) -> index out of bounce
134  - (insert[0]==0) -> Insert empty string
135  - (size==1)
136 
137  \param size Size of the 'dest' memory block.
138  \param dest Pointer to the destination array of 'size' characters.
139  \param insert C string to be inserted.
140  \param n Position in 'dest' where the string will be inserted.
141  \returns 'dest' is returned.
142  \test_example{ztrins}
143 */
144 char* ztrins( size_t size, char * dest, size_t n, const char * insert ) {
145  if ( dest==NULL || size==0 ) { return dest; }
146  else if ( size==1 ) { *dest=0; return dest; }
147  else { /* ( size>=2 ) */
148  size_t inslen, destlen = strlen( dest );
149  --size; /* max length for 'dest' */
150  if ( destlen>size ) { destlen = size; }
151  if ( n>size || n>destlen || insert[0]==0 ) {
152  dest[size] = 0; return dest;
153  }
154  inslen = strlen( insert );
155  if ( size <= n+inslen ) { /* the whole 'insert' does not fit */
156  memmove( &dest[n] , insert, (size-n) );
157  }
158  else { /* first move tail to the right */
159  if ( size <= destlen+inslen ) { /* only a piece fits */
160  memmove( &dest[n+inslen], &dest[n], (size-(n+inslen)) );
161  }
162  else { /* insert the whole thing */
163  memmove( &dest[n+inslen], &dest[n], (destlen-n) );
164  size = destlen+inslen;
165  }
166  memmove( &dest[n] , insert , inslen ); /* insert */
167  }
168  dest[size] = 0;
169  }
170  return dest;
171 /*
172  n size
173  +===!-----+.........+
174  destlen
175  +______+
176  inslen strlen("-----+") = (destlen-n)
177 
178  +===!______+-----+....+
179 */
180  #if 0
181  {
182  size_t MAX = ( (0==0) ? 500 : UINT_MAX/2 );
183  assert( inslen<MAX ); assert( n<MAX ); assert( destlen<MAX ); assert( size<MAX );
184  }
185  #endif
186 }
187 
188 /** Deletes the leading 'len' characters from 'str'.
189  When 'dest' has less than 'len' characters, it becomes the null string.
190  Any of the following conditions leaves the value in 'dest' unchanged:
191  - (dest[0]==0) ==> 'dest' is the null string
192  - (len == 0)
193  - (dest == NULL)
194 
195 \param dest Pointer to the destination array of characters.
196 \param len Number of characters to remove from 'dest'.
197 \returns 'dest' is returned.
198 \test_example{strdel}
199 */
200 char* strdel( char * dest, size_t len ) {
201  if ( dest==NULL || len==0 ) { /* nothing nada */ }
202  else if ( dest[0]==0 ) { return dest; }
203  else {
204  size_t destlen = strlen( dest );
205  if ( destlen<=len ) { dest[0]=0; }
206  else {
207  memmove( dest, dest+len, 1+(destlen-len) );
208  }
209  }
210  return dest;
211 }
212 
213 /** Copies the first 'len' characters from 'src' to 'dest'.
214  The string in 'dest' is zero terminated.
215  No more than 'size' characters in 'dest' get overwritten.
216  \param size Size of the 'dest' memory block.
217  \param dest C substring to be produced.
218  \param src Source string.
219  \param len maximum length of substring to be produced.
220  \test_example{ztrsub}
221 */
222 char* ztrsub( size_t size, char * dest, const char * src, size_t len ) {
223  if ( dest==NULL || size==0 ) { return dest; }
224  else if ( size==1 ) { *dest=0; return dest; }
225  else {
226  char *copy = dest;
227  --size; /* max number of chars to copy */
228  size = ( size < len ? size : len );
229  while (size>0) {
230  if ( 0 == (*copy = *src) ) { break; }
231  ++copy; ++src; --size;
232  }
233  *copy = 0;
234  }
235  return dest;
236 }
237 
238 /** Returns a pointer to the first character in 'str' different from 'tr'.
239  Does not change 'str' but returns a pointer inside it.
240 \param src Source string.
241 \param tr Character to trim from left of 'src'.
242 \test_example{strltrim}
243 */
244 char* strltrim( const char *src , char tr ) {
245  if ( src==NULL ) { return NULL; }
246 /* else if ( tr==0 ) { return src; } */
247  while ( *src==tr ) {
248  ++src;
249  }
250  return (char*)(src);
251 }
252 
253 /** Removes from 'str' all trailing characters that are equal to 'tr'.
254  Changes string 'str' and returns a pointer to it.
255  All trailing characters equal to 'tr' are removed inserting one char(0).
256 \param src Source string.
257 \param tr Character to trim from right of 'src'.
258 \test_example{strrtrim}
259 */
260 char* strrtrim( char *src , char tr ) {
261  if ( src==NULL ) { return NULL; }
262  else {
263  char * p = src;
264  char * last = src;
265  while ( *p!=0 ) {
266  if ( *p==tr ) {
267  last=p;
268  do {
269  ++p;
270  } while ( *p==tr );
271  }
272  else {
273  last=src;
274  ++p;
275  }
276  }
277  if ( *last==tr ) {
278  *last=0;
279  }
280  return src;
281  }
282 }
283 
284 /** return strrtrim( strltrim(s,tr),tr ).
285  Returns a pointer to the first character in 's' different from 'tr' but
286  also removes from 's' all trailing characters equal to 'tr'.
287 */
288 char* strtrim( char *src , char tr ) {
289  return strrtrim( strltrim(src,tr),tr );
290 }
291 
292 /** Removes every ocurrence of 'ch' from 'mem'.
293  Mnemonic: memczap <==> memory-zap-char.
294 
295  Scans the memory buffer 'mem' for every occurrence of character 'ch' and
296  removes it, for up to 'size' characters.
297  - Stops after 'size' characters have been scanned.
298  - Returns the number of non deleted characters that remain in 'mem'.
299 
300  \returns the size that the block should have after the characters are removed.
301 \param size Size of memory block 'mem'.
302 \param mem Memory block of characters.
303 \param ch Character to trim from right of 'src'.
304 \test_example{memczap}
305  */
306 size_t memczap( size_t size, void * mem, int ch ) {
307 /*
308  123456789
309  mem = <-!--!--!-> size = 9
310  memczap(mem, sizeof(mem), '-') yields:
311  mem == <!!!> returns 3 == (9-6)
312 
313  123456
314  mem = (*:**-*) size = 6
315  memczap(mem, sizeof(mem), '*') yields:
316  mem = (:-) returns 2 == (6-4).
317 
318 */
319  /* memczap_better() */
320  char *i; /* char in process within s */
321  char *j; /* next position to copy from s */
322  size_t len, count = 0;
323  i = j = (char*)(mem);
324  if ( ch!=*i ) { /* count prefix */
325  assert( "needs improvement" );
326  while ( ch!=*i ) {
327  ++i;
328  if ( (i-j)> (ptrdiff_t)(size) ) {
329  return size;
330  }
331  }
332  }
333  len = count = (i-j);
334  j = i;
335  while ( count<size ) {
336  if ( ch!=*i ) {
337  *j = *i; /* copy */
338  len++; /* count */
339  j++; /* next */
340  }
341 
342  i++;
343  count++;
344  }
345  return len;
346 
347 } /* memczap */
348 
349 #if 0
350 /* this is a less complictaed implementation */
351 size_t memczap_smaller( size_t size, void *mem, int ch ) {
352  char *i; /* char in process within s */
353  char *j; /* next position to copy from s */
354 
355  size_t count;
356  size_t len;
357 
358  i = j = (char*) mem;
359  len = count = 0;
360  while (count<size) {
361  if ( ch!=*i ) {
362  *j = *i; /* copy */
363  len++; /* count */
364  j++; /* next */
365  }
366 
367  i++;
368  count++;
369  }
370  return len;
371 
372 } /* memczap */
373 #endif
374 
375 /** Returns '1' if 'prefix' is a prefix of 'str'. Otherwise, returns '0'.
376 \param str C string to be scanned.
377 \param prefix C string containing the sequence of characters to match.
378 \test_example{strpfx}
379 */
380 int strpfx( const char *str, const char *prefix ) {
381  while ( *prefix != 0 ) {
382  if ( *prefix!=*str ) { return 0; }
383  ++prefix; ++str;
384  }
385  return 1;
386 }
387 
388 /** Returns '1' if 'suffix' is a suffix of 'str'. Otherwise, returns '0'.
389 \param str C string to be scanned.
390 \param suffix C string containing the sequence of characters to match.
391 \test_example{strsffx}
392 */
393 int strsffx( const char *str, const char *suffix ) {
394  size_t slen = strlen( str );
395  size_t xlen = strlen( suffix );
396  if ( slen<xlen ) { return 0; }
397  else {
398  const char *pStr = str+slen;
399  const char *pSfx = suffix+xlen;
400  while ( pSfx != suffix ) {
401  if ( *pStr!=*pSfx ) { return 0; }
402  --pStr; --pSfx;
403  }
404  }
405  return 1;
406 }
407 
408 /** Get span until character in character range '[a..z]'.
409  Scans 'str' for the first occurrence of any of the characters that are
410  part of character range begining in 'a' and ending in 'z', returning the
411  number of characters of 'str' read before this first occurrence.
412  The search includes the terminating null-characters. Therefore, the
413  function will return the length of 'str' if none of the characters in
414  range '[a..z]' are found in 'str'.
415 \param str C string to be scanned.
416 \param a First character in character range.
417 \param z Last character in character range.
418 \returns The length of the initial portion of 'str' containing only characters in range.
419 \test_example{strrspn}
420 */
421 size_t strrspn( const char * str, char a, char z ) {
422  size_t span = 0;
423  while ( ((*str)!=0) && (a<=(*str)) && ((*str)<=z) ) {
424  ++span; ++str;
425  }
426  return span;
427 }
428 
429 /** Uses strxltn1() to convert all letters in 'str'.
430  This has the effect of removing accents in many of those letters;
431  for example, 'á' is translated as 'a' and 'ÿ' as 'y'.
432  The translated characters are the upper 8 bit characters in the Latin 1
433  alphabet, also know as Windows-1252 and ISO/IEC 8859-1.
434  \see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
435  \see http://en.wikipedia.org/wiki/Windows-1252
436 
437  This is the translation table used:
438  \code
439  ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
440  AAAAAAECEEEEIIIIDNOOOOOx0UUUUYPsaaaaaaeceeeeiiiidnooooo/0uuuuypy
441  \endcode
442 \test_example{strxacct}
443 \test_example{strxltn1}
444 */
445 char* strxacct( char* str ) {
446  char *p = str;
447  while ( (*p)!=0 ) {
448  #if 0
449  const char*
450  // "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
451  tr = "AAAAAAECEEEEIIIIDNOOOOOx0UUUUYPsaaaaaaeceeeeiiiidnooooo/0uuuuypy";
452  size_t char ch = (*p);
453  if ( ch >=192 ) {
454  (*p) = tr[ ch-192 ];
455  }
456  #else
457  *p = strxltn1(*p) ;
458  #endif /* 0 */
459  ++p;
460  /* http://stackoverflow.com/questions/14094621/ */
461  }
462  return str;
463 }
464 
465 char* strtokl( char * str, const char * delimiters , size_t *len );
466 
467 /* strtok example */
468 #include <stdio.h>
469 #include <string.h>
470 
471 #if 0
472 
473 int main () {
474  char str[] ="- This, a sample string.";
475  char * pch;
476  printf ("Splitting string \"%s\" into tokens:\n",str);
477  pch = strtok (str," ,.-");
478  while (pch != NULL) {
479  printf ("%s\n",pch);
480  pch = strtok (NULL, " ,.-");
481  }
482  return 0;
483 }
484 
485 int main() {
486  char str[] ="- This, a sample string.";
487  char * pch;
488  size_t len;
489  printf ("Splitting string \"%s\" into tokens:\n",str);
490  pch = strtokl (str," ,.-",&len);
491  while (pch != NULL) {
492  {
493  const char* p = pch;
494  size_t l = 0;
495  while (l!=len) {
496  printf ("%c",*p);
497  ++l;
498  }
499  printf ("\n",str);
500  }
501  pch += len;
502  strtokl (pch, " ,.-",&len);
503  }
504  return 0;
505 }
506 #endif /* 0 */
507 
508 
509 /* ztring.c */
size_t strrspn(const char *str, char a, char z)
Get span until character in character range '[a..z]'.
Definition: ztring.c:421
char * ztrcat(size_t size, char *dest, const char *src)
Append characters to a string.
Definition: ztring.c:95
int strsffx(const char *str, const char *suffix)
Returns '1' if 'suffix' is a suffix of 'str'.
Definition: ztring.c:393
char * strrtrim(char *src, char tr)
Removes from 'str' all trailing characters that are equal to 'tr'.
Definition: ztring.c:260
A few string functions to enhance C's <string.h> library.
char * strltrim(const char *src, char tr)
Returns a pointer to the first character in 'str' different from 'tr'.
Definition: ztring.c:244
char * strtrim(char *src, char tr)
return strrtrim( strltrim(s,tr),tr ).
Definition: ztring.c:288
char * ztrins(size_t size, char *dest, size_t n, const char *insert)
Inserts string 'insert' into 'dest' at position 'n'.
Definition: ztring.c:144
char * strdel(char *dest, size_t len)
Deletes the leading 'len' characters from 'str'.
Definition: ztring.c:200
char * ztrsub(size_t size, char *dest, const char *src, size_t len)
Copies the first 'len' characters from 'src' to 'dest'.
Definition: ztring.c:222
char * ztrcpy(size_t size, char *dest, const char *src)
Copies up to 'size' characters from 'src' to 'dest'.
Definition: ztring.c:57
int main()
Definition: test_ztring.c:977
char * strxacct(char *str)
Uses strxltn1() to convert all letters in 'str'.
Definition: ztring.c:445
char * strtokl(char *str, const char *delimiters, size_t *len)
char strxltn1(char accented_latin_1)
Translates characters in range [192<–>192+63] into letters or ASCII symbols that look similar...
Definition: ztring.h:86
size_t memczap(size_t size, void *mem, int ch)
Removes every ocurrence of 'ch' from 'mem'.
Definition: ztring.c:306
int strpfx(const char *str, const char *prefix)
Returns '1' if 'prefix' is a prefix of 'str'.
Definition: ztring.c:380
[u]Micro module for [Unit] program testing.