String routines to complement <string> && <cstring>:
strnum.cpp
Go to the documentation of this file.
1 // strnum.cpp (c) 2019 adolfo.dimare@gmail.com
2 
3 /** \file strnum.cpp
4  \brief Complement routines for \c <string> && \c <cstring>
5 
6  \author adolfo.dimare@gmail.com
7  \date 2019
8 */
9 
10 #include "strnum.h"
11 
12 #include <iostream>
13 /** Insert in place the separator 'sep' for a numeric string.
14  - The string is changed in place and its length remains the same.
15  - Skips leading non numerics but handles correctly the sign '-' '+'
16  if it inmediatly preceeds the first digit of the number.
17  - Leading zeroes ('0') are treated as significant digits.
18  - The space for the separators is taken from the beginning of 'num'.
19  - Returns a pointer to the first digit (or sign) in the resulting
20  string.
21  - The characters to the left of the number get overwritten to make
22  space for the separator.
23  - The number in the string is extended to the left to make rooom for
24  the separators.
25  - The separator in put in place every 'w' digits, counting from the
26  last digit.
27  - To insert the comma ',' as the thousand separator use this:
28  - <code>char *first = insnsep( num, ',' ,3 );</code>
29  - On error, returns NULL and does not change 'num'. There are
30  several error conditions:
31  - When there is not enough room in the left of the string to
32  insert the separator(s) returns NULL.
33  - When ( w==0 ) returns NULL.
34  - When 'num' does not have any digits to work on returns NULL.
35  - When num==NULL returns NULL
36  - String 'num' always remains unchanged when NULL is returned.
37 
38  \dontinclude strnum_test.cpp
39  \skipline test::insnsep()
40  \until }}
41 */
42 char *insnsep( char* num, char sep, unsigned w ) {
43  char *first,*from,*to;
44  size_t nSkip,nDig,nSep,nLead;
45 
46  if ( num==NULL ) { return NULL; }
47  if ( w==0 ) { return NULL; }
48  first=num; nSkip=0;
49  while (*first!=0) { // find first digit in [0:9]
50  if ( '0'<=*first && *first<='9' ) { break; }
51  ++nSkip; ++first;
52  }
53  if (*first==0) { return NULL; } // no digits
54 
55  from=first; nDig=0;
56  while ('0'<=*from && *from<='9' ) { // count digits in [0:9]
57  ++from; ++nDig;
58  }
59 
60  if (w==1) { nSep=nDig-1; nLead=1; }
61  else {
62  nSep = nDig/w; // # separators to insert
63  nLead = nDig%w; // # digits before first separator
64  if (nLead==0) { --nSep; nLead=w; }
65  }
66  if (nSkip>0) if (*(first-1)=='-' || '+'==*(first-1)) {
67  --first; --nSkip; ++nLead; // handle sign
68  }
69 
70  // check that there is enough room to insert all separators
71  if ( nSep>nSkip ) { return NULL; }
72 
73  from = first;
74  first -= nSep; // move this much to the left
75  to = first; // where to copy next char
76  while ( to!=from ) { // [nSep==0] ==> [to==from]
77  *to = *from;
78  ++to; ++from; --nLead;
79  if ( nLead==0 ) {
80  *to = sep; ++to; // insert another separator
81  nLead = w;
82  }
83  }
84  return first;
85 }
86 
87 /** Use 'insnsep()' to format a Rupee amount.
88  - Returns NULL to signal error conditions. In this case, 'num'
89  remains unchanged.
90  \see http://di-mare.com/adolfo/p/mnyfmt.htm#fg-13
91  - Returns NULL to signal error conditions. In this case, 'num'
92  remains unchanged.
93 
94  \dontinclude strnum_test.cpp
95  \skipline test::insnsep_Rupee()
96  \until }}
97 */
98 char* insnsep_Rupee( char *num, char sep ) {
99  char *last, *first=num;
100  if (num==NULL) { return NULL; }
101  while (*first!=0) { // find first digit in [0:9]
102  if ( '0'<=*first && *first<='9' ) { break; }
103  ++first;
104  }
105  if (*first==0) { return NULL; }
106  last = first;
107  while ( '0'<=*last && *last<='9' ) { // find last digit in [0:9]
108  ++last;
109  }
110 
111  --last; // no danger because *first is a digit
112  {
113  char remember = *last;
114  *last = 0;
115  first = insnsep(num,sep,2);
116  *last = remember;
117  }
118  return first;
119 }
120 
121 /** Use 'insnsep()' to format a date.
122  - Requieres at least 8 digits to format.
123  - Ignores the sign '-' '+'.
124  - When 'YMD' is 'true' the first 4 digits will be assumed to be the
125  year, otherwise the last 4 digits are the year.
126  \see http://en.wikipedia.org/wiki/Date_format_by_country
127  - Returns NULL to signal error conditions. In this case, 'num'
128  remains unchanged.
129 
130  \dontinclude strnum_test.cpp
131  \skipline test::insnsep_date8()
132  \until }}
133 */
134 char* insnsep_date8( char *num, char sep, bool YMD ) {
135  char *from, *first=num;
136  size_t n;
137  if (num==NULL) { return NULL; }
138  while (*first!=0) { // find first digit in [0:9]
139  if ( '0'<=*first && *first<='9' ) { break; }
140  ++n; ++first;
141  }
142  if ( ( n<2 ) ) { return NULL; }
143  if (*first==0) { return NULL; }
144 
145  from=first; n=0;
146  while ('0'<=*from && *from<='9' ) { // count digits in [0:9]
147  ++from; ++n;
148  }
149  if ( n<8 ) { return NULL; }
150 
151  first -= 2; // make room for separator
152  if ( YMD ) { // YMD && YDM // "..01235687..."
153  memmove( first+0, first+2, 4 ); // "0123..5678..."
154  *(first+4) = sep; // "0123/.5678..."
155  memmove( first+5, first+6, 2 ); // "0123/56.78..."
156  *(first+7) = sep; // "0123/56/78..."
157  }
158  else { // DMY MDY
159  memmove( first+0, first+2, 2 ); // "..01346789..."
160  *(first+2) = sep; // "01/.346789..."
161  memmove( first+3, first+4, 2 ); // "01/34.6789.."
162  *(first+5) = sep; // "01/34/6789..."
163  }
164  return first; // it's easier to memmove() than to insnsep()
165 }
166 
167 /** Takes all leading numeric values from 'str' to fill up 'VEC[]'.
168  - Ignores any dot '.' and every negative number sign '-'.
169  - Numbers in 'str' should be separated by any "non digits".
170  - Stores up to 'N' number in 'VEC[]'.
171  - Requires 'sizeof(VEC[])<=N*sizeof(VEC[0])'.
172  - Returns the number of values it stores in 'VEC[]'.
173 
174  \dontinclude strnum_test.cpp
175  \skipline test::atou_vec()
176  \until }}
177 */
178 size_t atou_vec( const char* str, uintmax_t VEC[], size_t N ) {
179  size_t i=0; // [i=0 -> N[
180  uintmax_t val;
181  while ( i<N ) {
182  if ( *str==0 ) { break; }
183  while (*str!=0) { // skip !isdigit()
184  if ( '0'<=*str && *str<='9' ) { break; }
185  else { ++str; }
186  }
187  if ( *str==0 ) { break; }
188 
189  val = 0;
190  while ( '0'<=*str && *str<='9' ) {
191  val = 10*val + (*str-'0');
192  ++str;
193  }
194  VEC[i] = val;
195  ++i;
196  }
197  return i;
198 }
199 
200 /** Take all numeric values from 'str' to fill up 'VEC[]'.
201  - Numbers in 'str' should be separated by any "non numerics".
202  - Handles negative numbers with negative number sign '-'.
203  - For negative numbers, the negative number sign '-' must
204  immediately preceded its digits.
205  - Dot '.' is ignored (no floating point).
206  - Stores up to 'N' number in 'VEC[]'.
207  - Requires 'sizeof(VEC[])<=N*sizeof(VEC[0])'.
208  - Returns the number of values it stores in 'VEC[]'.
209 
210  \dontinclude strnum_test.cpp
211  \skipline test::atoi_vec()
212  \until }}
213 */
214 size_t atoi_vec( const char *str, intmax_t VEC[], size_t N ) {
215  bool isNegative = false;
216  size_t i=0; // [i=0 -> N[
217  uintmax_t val;
218  while ( i<N ) {
219  if ( *str==0 ) { break; }
220  while (*str!=0) { // skip !isdigit()
221  if ( '0'<=*str && *str<='9' ) { break; }
222  else if ( '-' == *str ) {
223  ++str;
224  if ( 0==*str ) { break; } // '-' is last char in 'str'
225  else if ( '0'<=*str && *str<='9' ) { // '-' before digit
226  isNegative = true;
227  break;
228  }
229  }
230  else { ++str; }
231  }
232  if ( *str==0 ) { break; }
233 
234  val = 0;
235  while ( '0'<=*str && *str<='9' ) {
236  val = 10*val + (*str-'0');
237  ++str;
238  }
239  VEC[i] = ( isNegative ? -val : val );
240  isNegative = false;
241  ++i;
242  }
243  return i;
244 }
245 
246 /** Extract the next number of 'N' digits from '*str'.
247  - Skips over leading non digit characteres ['0'..'9'].
248  - Only extracts consecutive digits.
249  - Skips over the sign '-'.
250  - Numbers in '*str' should be separated by any "non numerics".
251  - If the number is too big it wont fit in a 'uintmax_t'.
252  - Updates '*str' so that it points to the next character after
253  the last digit returned.
254  - Returns zero (0) if there are no further digits in '*str'.
255 
256  \dontinclude strnum_test.cpp
257  \skipline test::atou_ndigit()
258  \until }}
259 */
260 uintmax_t atou_ndigit( const char* *str, unsigned N ) {
261  if (str==0) { return 0; }
262  if (*str==0) { return 0; }
263  uintmax_t val=0;
264  while (**str!=0) { // skip !isdigit()
265  if ( '0'<=**str && **str<='9' ) { break; }
266  else { ++(*str); }
267  }
268  if ( **str==0 ) {
269  return 0;
270  }
271  while ( '0'<=**str && **str<='9' && N>0 ) {
272  val = 10*val + (**str-'0');
273  --N;
274  ++(*str);
275  }
276 
277  return val;
278 }
279 /* Extrae el siguiente valor numérico de 'N' dígitos de '*str'.
280  - Se brinca caracteres que no son dígitos ['0'..'9'].
281  - Solo usa dígitos consecutivos.
282  - Se brinca el signo '-'.
283  - Si 'N' es muy grande, el valor retornado no cabe en 'uintmax_t'.
284  - Actualiza '*str' para que apunte al siguiente caracter después del
285  último dígito del valor retornado 'num'.
286  - Retorna cero (0) si no hay más dígitos en '*str'.
287 */
288 
289 /** Converts 'val' into the zero terminated C-string 'dst'.
290  - 'dst' is converted to a 'base' numeric value.
291  - 'dst' must have at least 'sz' char's (including eos=0x0).
292  - Usually 'dst[] is an array where 'sz==sizeof( dst[] )'.
293  - Returns a pointer to the first non zero within 'dst'.
294  - Fills up with '0' the non significant digits in 'dst'.
295  - [Usually does not return 'dst'].
296  \pre (val>=0) && (base>0) && (len>0) && 'dst' must be big enough.
297 
298  \dontinclude strnum_test.cpp
299  \skipline test::utoa_sz()
300  \until }}
301 */
302 
303 char* utoa_sz( uintmax_t val, char *dst, unsigned base, size_t sz ) {
304  unsigned dgt;
305  char *p;
306  if ( dst!=0 && (base>0) && (sz>1) ) { /* OK */ }
307  else { /* *dst = 0 */ ; return dst; }
308  #if !defined(NDEBUG)
309  if ( (val>=0) ) { /* OK */ }
310  else { /* *dst = 0 */ ; return dst; }
311  #endif
312  --sz;
313  p = dst+sz; *p = 0; // EOS
314  do {
315  --p;
316  dgt = val % base;
317  val = val / base;
318  *p = dgt + '0';
319  --sz;
320  } while ( val!=0 && p!=dst );
321 
322  if (p!=dst) {
323  for ( char* lead=p-1; sz>0; --sz ) {
324  *lead = '0'; --lead;
325  }
326  }
327  return p;
328 }
329 
330 // std::cout << num << "->(nSkip,nDig,nSep,nLead)==("
331 // << nSkip << ',' << nDig << ',' << nSep << ',' << nLead<< ")\n";
332 // std::cout << to << " -> " << from << '\n';
333 
334 #if 0 /* OLD CODE */
335 
336 /** Use 'insnsep()' to format a date.
337  - Requieres exactly 8 digits to format.
338  - When 'YMD' is 'true' the first 4 digits will be assumed to be the
339  year, otherwise the last 4 digits are presummed to be the year.
340  \see http://en.wikipedia.org/wiki/Date_format_by_country
341  - Returns NULL to signal error conditions. In this case, 'num'
342  remains unchanged.
343 
344  \dontinclude strnum_test.cpp
345  \skipline test::insnsep_date8()
346  \until }}
347 */
348 char* insnsep_date8_old( char *num, char sep, bool YMD ) {
349  char *from, *first=num;
350  size_t n;
351  if (num==NULL) { return NULL; }
352  while (*first!=0) { // find first digit in [0:9]
353  if ( '0'<=*first && *first<='9' ) { break; }
354  ++n; ++first;
355  }
356  if ( ( n<2 ) ) { return NULL; }
357  if (*first==0) { return NULL; }
358 
359  from=first; n=0;
360  while ('0'<=*from && *from<='9' ) { // count digits in [0:9]
361  ++from; ++n;
362  }
363  if ( n!=8 ) { return NULL; }
364 
365  /*=/=*/ first = insnsep( num, sep, 4 );
366  --first;
367  if ( YMD ) { // " 1234/67:89..."
368  memmove( first ,first+1, 7 ); *(first+7) = sep;
369  }
370  else { // " 12:45/6789..."
371  memmove( first ,first+1, 2 ); *(first+2) = sep;
372  }
373  return first;
374 }
375 
376 /* Converts 'str' into an unsigned number.
377  - Uses the numeric prefix of \c str.
378  - Ignores the negative number sign '-'.
379 
380  \dontinclude strnum_test.cpp
381  \skipline test::atouint()
382  \until }}
383 */
384 uintmax_t atouint( const char * str ) {
385  return strtoull( str,NULL,0);
386 }
387 
388 size_t atoi_vec_old( const char *str, intmax_t VEC[], size_t N ) {
389  size_t LEN = strlen( str );
390  bool isNegative = false;
391  size_t k = 0; // index into str
392  size_t nxt = 0; // index into VEC[]
393  while ( k<LEN && nxt<N ) {
394  while ( k<LEN ) { // skip leading non numeric
395  if ( ('0'<=str[k] && str[k]<='9') ) { break; } // isdigit()
396  else if ( '-'==str[k] ) { // negative number
397  if ( k>=LEN-1 ) { // '-' is the last char in 'str'
398  k = LEN; break;
399  }
400  else if ( ('0'<=str[k+1] && str[k+1]<='9') ) {
401  ++k; // '-' before digit
402  isNegative = true;
403  break;
404  }
405  }
406  ++k;
407  }
408  if ( k==LEN ) { break; }
409 
410  int val = 0; // now convert digits
411  while ( ('0'<=str[k] && str[k]<='9') ) {
412  val = 10*val + (str[k]-'0');
413  ++k;
414  if (k==LEN) { break; }
415  }
416  VEC[nxt] = ( isNegative ? -val : val );
417  isNegative = false;
418  ++nxt;
419  }
420  return nxt;
421 }
422 
423 /** Extrae los primeros 'N' números de 'str'.
424  - Se brinca caracteres que no son dígitos ['0'..'9'].
425  - Solo usa dígitos consecutivos.
426  - Se brinca el signo '-'.
427  - VEC[] debe tener capacidad de almacenar al menos 'N' números.
428  - Retorna la cantidad de valores grabados en 'VEC[]'.
429 
430  \dontinclude strnum_test.cpp
431  \skipline test::atou_vec_old()
432  \until }}
433 */
434 size_t atou_vec_old( const char *str, uintmax_t VEC[], size_t N ) {
435  size_t LEN = strlen( str );
436  size_t k = 0; // index into str
437  size_t nxt = 0; // index into VEC[]
438  while ( k<LEN && nxt<N ) {
439  while ( k<LEN ) { // skip leading non numeric
440  if ( ('0'<=str[k] && str[k]<='9') ) { break; }
441  ++k;
442  }
443  if ( k==LEN ) { break; }
444 
445  // convert digits
446  uintmax_t val = 0;
447  while ( ('0'<=str[k] && str[k]<='9') ) {
448  val = 10*val + (str[k]-'0');
449  ++k;
450  if (k==LEN) { break; }
451  }
452  VEC[nxt] = val;
453  ++nxt;
454  }
455  return nxt;
456 }
457 
458 /// Gets the unsigned numbers in the rest of the current line.
459 /// Stores the numbers into 'VEC[]'.
460 /// Counts the numbers it stores and returns it.
461 size_t atou_vec_UGLY(const char str[], uintmax_t* VEC ) {
462  size_t LEN = strlen(str);
463  size_t idx=0, i=0, num=0;
464  bool inNumber = true;
465 
466  i = 0; // skip leading non numeric
467  while ( i<LEN ) {
468  if ( ('0'<=str[i] && str[i]<='9') ) { break; }
469  ++i;
470  }
471  if ( i==LEN ) { return 0; }
472 
473  idx = 0;
474  num = 0;
475  inNumber = true;
476  while ( i<LEN ) {
477  if ( inNumber ) {
478  if ( ('0'<=str[i] && str[i]<='9') ) { // isdigit()
479  num = num * 10 + (str[i]-'0');
480  }
481  else {
482  VEC[idx] = num; num=0;
483  ++idx;
484  inNumber = false;
485  }
486  }
487  else {
488  if ( ('0'<=str[i] && str[i]<='9') ) {
489  num = num * 10 + (str[i]-'0');
490  inNumber = true;
491  }
492  }
493  ++i;
494  }
495  if ( inNumber ) { VEC[idx] = num; ++idx; }
496  return idx;
497 }
498 
499 uintmax_t atouint_old( const char * str ) {
500  size_t i = 0;
501  size_t N = strlen(str);
502  while ( i<N ) { // skip leading non numeric
503  if ( ! ('0'<=str[i] && str[i]<='9') ) { ++i; } // !isdigit()
504  else { break; }
505  }
506 
507  uintmax_t n=0;
508  while ( i<N ) {
509  if ( ! ('0'<=str[i] && str[i]<='9') ) { break; }
510  n = 10 * n + (str[i]-'0');
511  ++i;
512  }
513  return n;
514 }
515 
516 #endif
517 
518 // EOF: strnum.cpp
insnsep_Rupee
char * insnsep_Rupee(char *num, char sep)
Use 'insnsep()' to format a Rupee amount.
Definition: strnum.cpp:98
strnum.h
Complement routines for <string> && <cstring>
utoa_sz
char * utoa_sz(uintmax_t val, char *dst, unsigned base, size_t sz)
Converts 'val' into the zero terminated C-string 'dst'.
Definition: strnum.cpp:303
atouint
uintmax_t atouint(const char *str)
Converts 'str' into an unsigned number invoking strtoull()
Definition: strnum.h:40
insnsep
char * insnsep(char *num, char sep, unsigned w)
Insert in place the separator 'sep' for a numeric string.
Definition: strnum.cpp:42
insnsep_date8
char * insnsep_date8(char *num, char sep, bool YMD)
Use 'insnsep()' to format a date.
Definition: strnum.cpp:134
atou_vec
size_t atou_vec(const char *str, uintmax_t VEC[], size_t N)
Takes all leading numeric values from 'str' to fill up 'VEC[]'.
Definition: strnum.cpp:178
atou_ndigit
uintmax_t atou_ndigit(const char **str, unsigned N)
Extract the next number of 'N' digits from '*str'.
Definition: strnum.cpp:260
atoi_vec
size_t atoi_vec(const char *str, intmax_t VEC[], size_t N)
Take all numeric values from 'str' to fill up 'VEC[]'.
Definition: strnum.cpp:214