Lolly 1.4.27
Loading...
Searching...
No Matches
analyze.cpp
Go to the documentation of this file.
1
2/******************************************************************************
3 * MODULE : analyze.cpp
4 * DESCRIPTION: Properties of characters and strings
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12#include "analyze.hpp"
14#include "ntuple.hpp"
15
16/******************************************************************************
17 * Tests for characters
18 ******************************************************************************/
19
20bool
22 int i= ((int) ((unsigned char) c));
23 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
24 ((i >= 128) && (i != 159) && (i != 189) && (i != 190) && (i != 191));
25}
26
27bool
29 int code= (int) ((unsigned char) c);
30 return ((c >= 'a') && (c <= 'z')) || ((code >= 160) && (code < 189)) ||
31 (code >= 224);
32}
33
34bool
36 int code= (int) ((unsigned char) c);
37 return ((c >= 'A') && (c <= 'Z')) || ((code >= 128) && (code < 159)) ||
38 ((code >= 192) && (code < 224));
39}
40
41/******************************************************************************
42 * Tests for strings
43 ******************************************************************************/
44
45bool
46is_alpha (string s) {
47 int i;
48 if (N (s) == 0) return false;
49 for (i= 0; i < N (s); i++)
50 if (!is_alpha (s[i])) return false;
51 return true;
52}
53
54bool
55is_alphanum (string s) {
56 int i;
57 if (N (s) == 0) return false;
58 for (i= 0; i < N (s); i++)
59 if (!(is_alpha (s[i]) || is_digit (s[i]))) return false;
60 return true;
61}
62
63bool
64is_locase_alpha (string s) {
65 int i;
66 if (N (s) == 0) return false;
67 for (i= 0; i < N (s); i++)
68 if (s[i] < 'a' || s[i] > 'z') return false;
69 return true;
70}
71
72bool
73is_iso_alpha (string s) {
74 int i;
75 if (N (s) == 0) return false;
76 for (i= 0; i < N (s); i++)
77 if (!is_iso_alpha (s[i])) return false;
78 return true;
79}
80
81bool
82is_numeric (string s) {
83 int i;
84 if (N (s) == 0) return false;
85 for (i= 0; i < N (s); i++)
86 if (!is_numeric (s[i])) return false;
87 return true;
88}
89
90/******************************************************************************
91 * Changing cases
92 ******************************************************************************/
93
94char
95upcase (char c) {
96 if (is_iso_locase (c)) return (char) (((int) ((unsigned char) c)) - 32);
97 else return c;
98}
99
100char
101locase (char c) {
102 if (is_iso_upcase (c)) return (char) (((int) ((unsigned char) c)) + 32);
103 else return c;
104}
105
106char
108 if (c == '{') return '}';
109 if (c == '(') return ')';
110 if (c == '[') return ']';
111 return c;
112}
113
114string
115upcase_first (string s) {
116 if ((N (s) == 0) || (!is_iso_locase (s[0]))) return s;
117 return string ((char) (((int) ((unsigned char) s[0])) - 32)) * s (1, N (s));
118}
119
120string
121locase_first (string s) {
122 if ((N (s) == 0) || (!is_iso_upcase (s[0]))) return s;
123 return string ((char) (((int) ((unsigned char) s[0])) + 32)) * s (1, N (s));
124}
125
126string
127upcase_all (string s) {
128 int i;
129 string r (N (s));
130 for (i= 0; i < N (s); i++)
131 if (!is_iso_locase (s[i])) r[i]= s[i];
132 else r[i]= (char) (((int) ((unsigned char) s[i])) - 32);
133 return r;
134}
135
136string
137locase_all (string s) {
138 int i;
139 string r (N (s));
140 for (i= 0; i < N (s); i++)
141 if (!is_iso_upcase (s[i])) r[i]= s[i];
142 else r[i]= (char) (((int) ((unsigned char) s[i])) + 32);
143 return r;
144}
145
146/******************************************************************************
147 * Inserting or removing a character into a string as a set of characters
148 ******************************************************************************/
149
150string
151string_union (string s1, string s2) {
152 return string_minus (s1, s2) * s2;
153}
154
155string
156string_minus (string s1, string s2) {
157 string r;
158 int i1, n1= N (s1), i2, n2= N (s2);
159 for (i1= 0; i1 < n1; i1++) {
160 for (i2= 0; i2 < n2; i2++)
161 if (s1[i1] == s2[i2]) break;
162 if (i2 == n2) r << s1[i1];
163 }
164 return r;
165}
166
167string
168remove_prefix (string s, string prefix) {
169 if (is_empty (s) || is_empty (prefix)) return s;
170 if (starts (s, prefix)) return s (N (prefix), N (s));
171 return s;
172}
173
174string
175remove_suffix (string s, string suffix) {
176 if (is_empty (s) || is_empty (suffix)) return s;
177 if (ends (s, suffix)) return s (0, N (s) - N (suffix));
178 return s;
179}
180
181/******************************************************************************
182 * Spanish in relation with ispell
183 ******************************************************************************/
184
185string
187 int i, n= N (s);
188 string r;
189 for (i= 0; i < n; i++)
190 if ((s[i] == '\'') && ((i + 1) < n)) {
191 switch (s[i + 1]) {
192 case 'A':
193 r << '\301';
194 break;
195 case 'E':
196 r << '\311';
197 break;
198 case 'I':
199 r << '\315';
200 break;
201 case 'N':
202 r << '\321';
203 break;
204 case 'O':
205 r << '\323';
206 break;
207 case 'U':
208 r << '\332';
209 break;
210 case 'Y':
211 r << '\335';
212 break;
213 case 'a':
214 r << '\341';
215 break;
216 case 'e':
217 r << '\351';
218 break;
219 case 'i':
220 r << '\355';
221 break;
222 case 'n':
223 r << '\361';
224 break;
225 case 'o':
226 r << '\363';
227 break;
228 case 'u':
229 r << '\372';
230 break;
231 case 'y':
232 r << '\375';
233 break;
234 default:
235 r << '\'' << s[i + 1];
236 }
237 i++;
238 }
239 else r << s[i];
240 return r;
241}
242
243string
245 int i, n= N (s);
246 string r;
247 for (i= 0; i < n; i++)
248 switch (s[i]) {
249 case '\301':
250 r << "'A";
251 break;
252 case '\311':
253 r << "'E";
254 break;
255 case '\315':
256 r << "'I";
257 break;
258 case '\321':
259 r << "'N";
260 break;
261 case '\323':
262 r << "'O";
263 break;
264 case '\332':
265 r << "'U";
266 break;
267 case '\335':
268 r << "'Y";
269 break;
270 case '\341':
271 r << "'a";
272 break;
273 case '\351':
274 r << "'e";
275 break;
276 case '\355':
277 r << "'i";
278 break;
279 case '\361':
280 r << "'n";
281 break;
282 case '\363':
283 r << "'o";
284 break;
285 case '\372':
286 r << "'u";
287 break;
288 case '\375':
289 r << "'y";
290 break;
291 default:
292 r << s[i];
293 }
294 return r;
295}
296
297string
299 int i, n= N (s);
300 string r;
301 for (i= 0; i < n; i++)
302 if (s[i] == '\337') r << '\377';
303 else r << s[i];
304 return r;
305}
306
307string
309 int i, n= N (s);
310 string r;
311 for (i= 0; i < n; i++)
312 if (s[i] == '\377') r << '\337';
313 else r << s[i];
314 return r;
315}
316
317/******************************************************************************
318 * Iso latin 2 encoding for polish and czech
319 ******************************************************************************/
320
321static string il2_to_cork_string=
322 "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221"
323 "\222\223\224\225\226\227\230\231\232\233\234\235\236\237 \20\212 "
324 "\211\221\237¨\222\223\224\231‐\232\233 "
325 "\241˛\252´\251\261ˇ¸\262\263\264\271˝\272\273\217\301\302\200\304\210\202"
326 "\307\203\311\206\313\205\315\316\204\320\213\214\323\324\216\326."
327 "\220\227\332\226\334\335\225\377\257\341\342\240\344\250\242\347\243\351"
328 "\246\353\245\355\356\244\236\253\254\363\364\256\366/"
329 "\260\267\372\266\374\375\265 ";
330static string cork_to_il2_string=
331 "\303\241\306\310\317\314\312G\305\245\243\321\322 "
332 "\325\300\330\246\251\252\253\336\333\331Y\254\256\257II\360\247\343\261"
333 "\346\350\357\354\352g\345\265\263\361\362 "
334 "\365\340\370\266\271\272\273\376\373\371y\274\276\277i!?"
335 "LA\301\302A\304AA\307E\311E\313I\315\316I\320NO\323\324O\326OOU\332U\334"
336 "\335 "
337 "Sa\341\342a\344aa\347e\351e\353i\355\356i\360no\363\364o\366oou\372u\374"
338 "\375 \337";
339
340static char
342 int i= (int) ((unsigned char) c);
343 if (i < 128) return c;
344 return il2_to_cork_string[i - 128];
345}
346
347static char
349 int i= (int) ((unsigned char) c);
350 if (i < 128) return c;
351 return cork_to_il2_string[i - 128];
352}
353
354string
355il2_to_cork (string s) {
356 int i, n= N (s);
357 string r (n);
358 for (i= 0; i < n; i++)
359 r[i]= il2_to_cork (s[i]);
360 return r;
361}
362
363string
364cork_to_il2 (string s) {
365 int i, n= N (s);
366 string r (n);
367 for (i= 0; i < n; i++)
368 r[i]= cork_to_il2 (s[i]);
369 return r;
370}
371
372/******************************************************************************
373 * Roman and alpha numbers
374 ******************************************************************************/
375
376string
378 if (nr < 0) return "-" * alpha_nr (-nr);
379 if (nr == 0) return "0";
380 if (nr <= 26) return string ((char) (((int) 'a') + nr - 1));
381 return alpha_nr ((nr - 1) / 26) * alpha_nr (((nr - 1) % 26) + 1);
382}
383
384string
386 return upcase_all (alpha_nr (nr));
387}
388
389string
391 if (nr < 0) nr= -nr;
392 string sym, r;
393 int i, m= (nr - 1) % 6, n= ((nr - 1) / 6) + 1;
394 switch (m) {
395 case 0:
396 sym= "<asterisk>";
397 break;
398 case 1:
399 sym= "<dag>";
400 break;
401 case 2:
402 sym= "<ddag>";
403 break;
404 case 3:
405 sym= "<paragraph>";
406 break;
407 case 4:
408 sym= "<endofline>";
409 break;
410 case 5:
411 sym= "||";
412 break;
413 }
414 for (i= 0; i < n; i++)
415 r << sym;
416 return r;
417}
418
419string
420raw_quote (string s) {
421 // Mark the label of a STRING tree as representing a string and not a symbol.
422 return "\"" * s * "\"";
423}
424
425string
426raw_unquote (string s) {
427 // Get the string value of a STRING tree label representing a string.
428 if (is_quoted (s)) return s (1, N (s) - 1);
429 else return s;
430}
431
432/******************************************************************************
433 * Handling escape characters
434 ******************************************************************************/
435
436string
437escape_sh (string s) {
438#if (defined OS_MINGW || defined OS_WIN)
439 return raw_quote (s);
440#else
441 int i, n= N (s);
442 string r;
443 for (i= 0; i < n; i++)
444 switch (s[i]) {
445 case '(':
446 case ')':
447 case '<':
448 case '>':
449 case '?':
450 case '&':
451 case '$':
452 case '`':
453 case '\"':
454 case '\\':
455 case ' ':
456 r << '\\' << s[i];
457 break;
458 case '\n':
459 r << "\\n";
460 break;
461 default:
462 r << s[i];
463 }
464 return r;
465#endif
466}
467
468string
469escape_generic (string s) {
470 int i, n= N (s);
471 string r;
472 for (i= 0; i < n; i++) {
473 if ((s[i] == '\2') || (s[i] == '\5') || (s[i] == '\33')) r << '\33';
474 r << s[i];
475 }
476 return r;
477}
478
479string
480escape_verbatim (string s) {
481 int i, n= N (s);
482 string r;
483 for (i= 0; i < n; i++) {
484 unsigned char c= (unsigned char) s[i];
485 if ((c == '\n') || (c == '\t')) r << ' ';
486 else if (((int) c) >= 32) r << s[i];
487 }
488 return r;
489}
490
491string
492escape_spaces (string s) {
493 int i, n= N (s);
494 string r;
495 for (i= 0; i < n; i++) {
496 unsigned char c= (unsigned char) s[i];
497 if (c == ' ') r << '\\';
498 r << c;
499 }
500 return r;
501}
502
503string
504dos_to_better (string s) {
505 int i, n= N (s);
506 string r;
507 for (i= 0; i < n; i++)
508 if (s[i] == '\015')
509 ;
510 else r << s[i];
511 return r;
512}
513
514string
515unescape_guile (string s) {
516 int i, n= N (s);
517 string r;
518 for (i= 0; i < n; i++) {
519 if (s[i] == '\\') {
520 if (i + 1 < n && s[i + 1] == '\\') {
521 r << "\\\\\\\\";
522 i+= 1;
523 }
524 else if (i + 3 < n && s[i + 1] == 'x' && is_hex_digit (s[i + 2]) &&
525 is_hex_digit (s[i + 3])) {
526 string e= s (i + 2, i + 4);
527 r << (unsigned char) lolly::data::from_hex (e);
528 i+= 3;
529 }
530 else r << s[i];
531 }
532 else r << s[i];
533 }
534 return r;
535}
536
537/******************************************************************************
538 * Reading input from a string
539 ******************************************************************************/
540
541bool
542test (string s, int i, const char* test) {
543 int n= N (s), j= 0;
544 while (test[j] != '\0') {
545 if (i >= n) return false;
546 if (s[i] != test[j]) return false;
547 i++;
548 j++;
549 }
550 return true;
551}
552
553bool
554test (string s, int i, string test) {
555 int n= N (s), m= N (test), j= 0;
556 while (j < m) {
557 if (i >= n) return false;
558 if (s[i] != test[j]) return false;
559 i++;
560 j++;
561 }
562 return true;
563}
564
565bool
566starts (string s, const char* what) {
567 return test (s, 0, what);
568}
569
570bool
571starts (string s, const string what) {
572 return test (s, 0, what);
573}
574
575bool
576ends (string s, const char* what) {
577 string r (what);
578 if (N (r) > N (s)) return false;
579 return s (N (s) - N (r), N (s)) == r;
580}
581
582bool
583ends (string s, const string r) {
584 if (N (r) > N (s)) return false;
585 return s (N (s) - N (r), N (s)) == r;
586}
587
588bool
589read (string s, int& i, const char* test) {
590 int n= N (s), j= 0, k= i;
591 while (test[j] != '\0') {
592 if (k >= n) return false;
593 if (s[k] != test[j]) return false;
594 j++;
595 k++;
596 }
597 i= k;
598 return true;
599}
600
601bool
602read (string s, string test) {
603 int i= 0;
604 return read (s, i, test);
605}
606
607bool
608read (string s, int& i, string test) {
609 int n= N (s), m= N (test), j= 0, k= i;
610 while (j < m) {
611 if (k >= n) return false;
612 if (s[k] != test[j]) return false;
613 j++;
614 k++;
615 }
616 i= k;
617 return true;
618}
619
620bool
621read_line (string s, int& i, string& result) {
622 int start= i;
623 for (; i < N (s); i++) {
624 if (s[i] == '\n') {
625 result= s (start, i++);
626 return true;
627 }
628 }
629 result= s (start, i);
630 return false;
631}
632
633bool
634read_int (string s, int& i, int& result) {
635 int n= N (s), start= i;
636 result= 0;
637 if (i == n) return false;
638 if (s[i] == '-') {
639 if (i + 1 == n) return false;
640 if (!is_digit (s[i + 1])) return false;
641 i++;
642 }
643 else if (!is_digit (s[i])) return false;
644 while ((i < n) && is_digit (s[i]))
645 i++;
646 result= as_int (s (start, i));
647 return true;
648}
649
650bool
651read_double (string s, int& i, double& result) {
652 int n= N (s), start= i;
653 result= 0.0;
654 if (i == n) return false;
655 if (s[i] == '-') {
656 if (i + 1 == n) return false;
657 if (!is_numeric (s[i + 1])) return false;
658 i++;
659 }
660 else if (!is_numeric (s[i])) return false;
661 while ((i < n) && is_digit (s[i]))
662 i++;
663 if ((i < n) && (s[i] == '.')) i++;
664 while ((i < n) && is_digit (s[i]))
665 i++;
666 if ((i < n) && ((s[i] == 'e') || (s[i] == 'E'))) {
667 i++;
668 if ((i < n) && (s[i] == '-')) i++;
669 if ((i == n) || (!is_digit (s[i]))) {
670 i= start;
671 return false;
672 }
673 while ((i < n) && is_digit (s[i]))
674 i++;
675 }
676 result= as_double (s (start, i));
677 return true;
678}
679
680bool
681read_word (string s, int& i, string& result) {
682 int opos= i;
683 while (i < N (s) && is_alpha (s[i])) {
684 i++;
685 }
686 result= s (opos, i);
687 return i > opos;
688}
689
690bool
691is_whitespace (string s) {
692 for (int i= 0; i < N (s); i++)
693 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\n') return false;
694 return true;
695}
696
697void
698skip_spaces (string s, int& i) {
699 int n= N (s);
700 while ((i < n) && ((s[i] == ' ') || (s[i] == '\t')))
701 i++;
702}
703
704void
705skip_whitespace (string s, int& i) {
706 int n= N (s);
707 while ((i < n) && ((s[i] == ' ') || (s[i] == '\t') || (s[i] == '\n')))
708 i++;
709}
710
711void
712skip_line (string s, int& i) {
713 int n= N (s);
714 while ((i < n) && (s[i] != '\n'))
715 i++;
716 if (i < n) i++;
717}
718
719void
720skip_symbol (string s, int& i) {
721 int n= N (s);
722 if (i < n) {
723 if (s[i] == '<') {
724 for (i++; i < n; i++)
725 if (s[i - 1] == '>') break;
726 }
727 else i++;
728 }
729}
730
731string
732convert_tabs_to_spaces (string s, int tw) {
733 int i= 0, ts= 0, n= N (s);
734 string r= "";
735 while (i < n) {
736 if (s[i] == '\t') {
737 r << string (' ', tw - ((i - ts) % tw));
738 ts= i + 1;
739 }
740 else if (s[i] == '\n') {
741 ts= i + 1;
742 r << s[i];
743 }
744 else r << s[i];
745 i++;
746 }
747 return r;
748}
749
750/******************************************************************************
751 * Parsing binary data
752 ******************************************************************************/
753
754void
755parse (string s, int& pos, QI& ret) {
756 ret= (QI) s[pos++];
757}
758
759void
760parse (string s, int& pos, QN& ret) {
761 ret= (QN) s[pos++];
762}
763
764void
765parse (string s, int& pos, HI& ret) {
766 QI c1= (QI) s[pos++];
767 QN c2= (QN) s[pos++];
768 ret = (((HI) c1) << 8) + c2;
769}
770
771void
772parse (string s, int& pos, HN& ret) {
773 QN c1= (QN) s[pos++];
774 QN c2= (QN) s[pos++];
775 ret = (((HN) c1) << 8) + c2;
776}
777
778void
779parse (string s, int& pos, SI& ret) {
780 QI c1= (QI) s[pos++];
781 QN c2= (QN) s[pos++];
782 QN c3= (QN) s[pos++];
783 QN c4= (QN) s[pos++];
784 ret = (((((((SI) c1) << 8) + ((SI) c2)) << 8) + ((SI) c3)) << 8) + c4;
785}
786
787void
788parse (string s, int& pos, SI*& a, int len) {
789 int i;
791 for (i= 0; i < len; i++)
792 parse (s, pos, a[i]);
793}
794
795/******************************************************************************
796 * Searching, replacing and pattern matching
797 ******************************************************************************/
798
799int
800index_of (string s, char c) {
801 int s_N= N (s);
802 for (int i= 0; i < s_N; i++) {
803 if (s[i] == c) {
804 return i;
805 }
806 }
807 return -1;
808}
809
810int
811search_forwards (array<string> a, int pos, string in) {
812 int n= N (in), na= N (a);
813 while (pos <= n) {
814 for (int i= 0; i < na; i++)
815 if (N (a[i]) > 0 && in[pos] == a[i][0] && test (in, pos, a[i]))
816 return pos;
817 pos++;
818 }
819 return -1;
820}
821
822int
823search_forwards (string s, int pos, string in) {
824 int k= N (s), n= N (in);
825 if (k == 0) return pos;
826 char c= s[0];
827 while (pos + k <= n) {
828 if (in[pos] == c && test (in, pos, s)) return pos;
829 pos++;
830 }
831 return -1;
832}
833
834int
835search_forwards (string s, string in) {
836 return search_forwards (s, 0, in);
837}
838
839bool
840occurs (string what, string in) {
841 return search_forwards (what, 0, in) >= 0;
842}
843
844bool
845contains (string s, string what) {
846 return search_forwards (what, 0, s) >= 0;
847}
848
849bool
850contains (string s, char c) {
851 int s_N= N (s);
852 for (int i= 0; i < s_N; i++) {
853 if (s[i] == c) {
854 return true;
855 }
856 }
857 return false;
858}
859
860int
861search_backwards (string s, int pos, string in) {
862 while (pos >= 0) {
863 if (test (in, pos, s)) return pos;
864 pos--;
865 }
866 return -1;
867}
868
869int
870search_backwards (string s, string in) {
871 return search_backwards (s, N (in) - N (s), in);
872}
873
874int
875count_occurrences (string s, string in) {
876 int count= 0;
877 int i= 0, next, n= N (in);
878 while (i < n) {
879 next= search_forwards (s, i, in);
880 if (next == -1) break;
881 count++;
882 i= next + 1;
883 }
884 return count;
885}
886
887int
888overlapping (string s1, string s2) {
889 // return the longuest string being suffix of s1 and prefix of s2
890 int i= min (N (s1), N (s2)), n= N (s1);
891 while (i > 0) {
892 if (s1 (n - i, n) == s2 (0, i)) return i;
893 i--;
894 }
895 return 0;
896}
897
898string
899replace (string s, string what, string by) {
900 int i, n= N (s);
901 string r;
902 for (i= 0; i < n;)
903 if (test (s, i, what)) {
904 r << by;
905 i+= N (what);
906 }
907 else {
908 r << s[i];
909 i++;
910 }
911 return r;
912}
913
914static bool
915match_wildcard (string s, int spos, string w, int wpos) {
916 if (wpos == N (w)) return spos == N (s);
917 if (w[wpos] != '*')
918 return (spos < N (s)) && (s[spos] == w[wpos]) &&
919 match_wildcard (s, spos + 1, w, wpos + 1);
920 while ((wpos < N (w)) && (w[wpos] == '*'))
921 wpos++;
922 while (spos <= N (s)) {
923 if (match_wildcard (s, spos, w, wpos)) return true;
924 spos++;
925 }
926 return false;
927}
928
929bool
930match_wildcard (string s, string w) {
931 return match_wildcard (s, 0, w, 0);
932}
933
934int
935find_non_alpha (string s, int pos, bool forward) {
936 if (forward) {
937 for (; pos < N (s); pos++)
938 if (!is_alpha (s[pos])) return pos;
939 }
940 else {
941 for (; pos > 0; pos--)
942 if (!is_alpha (s[pos - 1])) return pos - 1;
943 }
944 return -1;
945}
946
948tokenize (string s, string sep) {
949 int start= 0;
951 for (int i= 0; i < N (s);)
952 if (test (s, i, sep)) {
953 a << s (start, i);
954 i+= N (sep);
955 start= i;
956 }
957 else i++;
958 a << s (start, N (s));
959 return a;
960}
961
962string
964 string r;
965 for (int i= 0; i < N (a); i++) {
966 if (i != 0) r << sep;
967 r << a[i];
968 }
969 return r;
970}
971
972string
974 int start;
975 for (start= 0; start < N (s) && is_space (s[start]); start++)
976 ;
977 return s (start, N (s));
978}
979
980string
982 int end;
983 for (end= N (s) - 1; end >= 0 && is_space (s[end]); end--)
984 ;
985 return s (0, end + 1);
986}
987
988string
989trim_spaces (string s) {
991}
992
995 array<string> b (N (a));
996 for (int i= 0; i < N (a); i++)
997 b[i]= trim_spaces (a[i]);
998 return b;
999}
1000
1001/******************************************************************************
1002 * Differences between two strings
1003 ******************************************************************************/
1004
1005static int
1006find_longest (string s1, string s2, int& c1, int& c2) {
1007 int n1= N (s1), n2= N (s2), bc= 0, bl= 0, br= 0;
1008 for (c2= 0; c2 < n2; c2++)
1009 if (s1[c1] == s2[c2]) {
1010 int l= 0, r= 0;
1011 while (c1 + r < n1 && c2 + r < n2 && s1[c1 + r] == s2[c2 + r])
1012 r++;
1013 while (l < c1 && l < c2 && s1[c1 - l - 1] == s2[c2 - l - 1])
1014 l++;
1015 if (l + r > bl + br) {
1016 bc= c2;
1017 bl= l;
1018 br= r;
1019 }
1020 }
1021 if (bl + br > 0) {
1022 c1= c1 - bl;
1023 c2= bc - bl;
1024 }
1025 return bl + br;
1026}
1027
1028static void
1029find_common (string s1, string s2, int& c1, int& c2) {
1030 int best_len= 0;
1031 c1= c2= 0;
1032 int n1= N (s1), n2= N (s2);
1033 if (n1 == 0 || n2 == 0) return;
1034 int t= min (min (n1, n2), 6);
1035 for (int k= 1; k < t; k++) {
1036 int a1= (k * n1) / t, a2= (k * n2) / t;
1037 int len= find_longest (s1, s2, a1, a2);
1038 if (len > best_len) {
1039 best_len= len;
1040 c1 = a1;
1041 c2 = a2;
1042 }
1043 }
1044}
1045
1047differences (string s1, string s2) {
1048 int n1= N (s1), n2= N (s2);
1049 int i1= 0, i2= 0, j1= n1, j2= n2;
1050 while (i1 < j1 && i2 < j2 && s1[i1] == s2[i2]) {
1051 i1++;
1052 i2++;
1053 }
1054 while (i1 < j1 && i2 < j2 && s1[j1 - 1] == s2[j2 - 1]) {
1055 j1--;
1056 j2--;
1057 }
1058 if (i1 == i2 && j1 == j2) return array<int> ();
1059 if (i1 > 0 || i2 > 0 || j1 < n1 || j2 < n2) {
1060 array<int> r= differences (s1 (i1, j1), s2 (i2, j2));
1061 for (int k= 0; k < N (r); k+= 4) {
1062 r[k]+= i1;
1063 r[k + 1]+= i1;
1064 r[k + 2]+= i2;
1065 r[k + 3]+= i2;
1066 }
1067 return r;
1068 }
1069 else {
1070 int c1, c2;
1071 find_common (s1, s2, c1, c2);
1072 if (c1 == 0 && c2 == 0) {
1073 array<int> r;
1074 r << i1 << j1 << i2 << j2;
1075 return r;
1076 }
1077 else {
1078 array<int> r1= differences (s1 (0, c1), s2 (0, c2));
1079 array<int> r2= differences (s1 (c1, n1), s2 (c2, n2));
1080 for (int k= 0; k < N (r2); k+= 4) {
1081 r2[k]+= c1;
1082 r2[k + 1]+= c1;
1083 r2[k + 2]+= c2;
1084 r2[k + 3]+= c2;
1085 }
1086 r1 << r2;
1087 return r1;
1088 }
1089 }
1090}
1091
1092int
1093distance (string s1, string s2) {
1094 int d= 0;
1096 for (int k= 0; k < N (r); k+= 4)
1097 d+= max (r[k + 1] - r[k], r[k + 3] - r[k + 2]);
1098 return d;
1099}
1100
1101/******************************************************************************
1102 * Parse length
1103 ******************************************************************************/
1104
1105void
1106parse_length (string s, double& len, string& unit) {
1107 int start= 0;
1108 int i, n= N (s);
1109 for (i= start; i < n && !is_locase (s[i]); i++) {
1110 }
1111 string s1= s (start, i);
1112 string s2= s (i, n);
1113 if (is_double (s1) && (is_locase_alpha (s2) || is_empty (s2))) {
1114 len = as_double (s1);
1115 unit= s2;
1116 }
1117 else {
1118 len = 0.0;
1119 unit= "error";
1120 }
1121}
bool starts(string s, const char *what)
Definition analyze.cpp:566
string alpha_nr(int nr)
Generates an alphabetic string for an integer.
Definition analyze.cpp:377
bool is_alphanum(string s)
Definition analyze.cpp:55
string escape_spaces(string s)
Escape spaces in a string with a backslash.
Definition analyze.cpp:492
string igerman_to_german(string s)
Convert igerman string to german string.
Definition analyze.cpp:298
void skip_line(string s, int &i)
Definition analyze.cpp:712
string locase_all(string s)
Converts all uppercase characters in a string to lowercase.
Definition analyze.cpp:137
string unescape_guile(string s)
Unescape a Guile-syntax string.
Definition analyze.cpp:515
bool read(string s, int &i, const char *test)
Definition analyze.cpp:589
static char cork_to_il2(char c)
Definition analyze.cpp:348
int search_forwards(array< string > a, int pos, string in)
Definition analyze.cpp:811
string trim_spaces(string s)
Definition analyze.cpp:989
array< string > tokenize(string s, string sep)
Definition analyze.cpp:948
void skip_symbol(string s, int &i)
Definition analyze.cpp:720
bool ends(string s, const char *what)
Definition analyze.cpp:576
string string_union(string s1, string s2)
Union of two strings.
Definition analyze.cpp:151
static string il2_to_cork_string
Definition analyze.cpp:321
void parse_length(string s, double &len, string &unit)
Parses a string containing a length value and its unit.
Definition analyze.cpp:1106
string convert_tabs_to_spaces(string s, int tw)
Converts tabs in a string to spaces.
Definition analyze.cpp:732
string remove_prefix(string s, string prefix)
Remove the prefix from s if matches.
Definition analyze.cpp:168
string raw_quote(string s)
Add quotes around a string to indicate it's a string, not a symbol.
Definition analyze.cpp:420
static int find_longest(string s1, string s2, int &c1, int &c2)
Definition analyze.cpp:1006
bool contains(string s, string what)
Definition analyze.cpp:845
bool is_iso_alpha(char c)
Checks if a character is an ISO alphabetic character.
Definition analyze.cpp:21
bool is_whitespace(string s)
Definition analyze.cpp:691
int count_occurrences(string s, string in)
Definition analyze.cpp:875
string spanish_to_ispanish(string s)
Convert Spanish string to ispanish string.
Definition analyze.cpp:244
string escape_sh(string s)
Escape a string for use in shell scripts.
Definition analyze.cpp:437
string trim_spaces_left(string s)
Definition analyze.cpp:973
void skip_whitespace(string s, int &i)
Definition analyze.cpp:705
string upcase_first(string s)
Converts the first character of a string to uppercase.
Definition analyze.cpp:115
char locase(char c)
Converts an uppercase character to lowercase.
Definition analyze.cpp:101
bool is_alpha(string s)
Checks if a string contains only alphabetic characters.
Definition analyze.cpp:46
bool occurs(string what, string in)
Definition analyze.cpp:840
static char il2_to_cork(char c)
Definition analyze.cpp:341
bool read_int(string s, int &i, int &result)
Definition analyze.cpp:634
bool is_locase_alpha(string s)
Checks if a string contains only lowercase alphabetic characters.
Definition analyze.cpp:64
char closing_delimiter(char c)
Finds the closing delimiter corresponding to the given opening delimiter.
Definition analyze.cpp:107
string locase_first(string s)
Converts the first character of a string to lowercase.
Definition analyze.cpp:121
string recompose(array< string > a, string sep)
Definition analyze.cpp:963
string escape_generic(string s)
Escape a string with generic escape sequences.
Definition analyze.cpp:469
string ispanish_to_spanish(string s)
Convert ispanish string to Spanish string.
Definition analyze.cpp:186
array< int > differences(string s1, string s2)
the differences between two strings by identifying the common substrings and returning the different ...
Definition analyze.cpp:1047
bool test(string s, int i, const char *test)
Definition analyze.cpp:542
string escape_verbatim(string s)
Escape a string to be displayed verbatim.
Definition analyze.cpp:480
char upcase(char c)
Converts a lowercase character to uppercase.
Definition analyze.cpp:95
static bool match_wildcard(string s, int spos, string w, int wpos)
Definition analyze.cpp:915
string string_minus(string s1, string s2)
Remove characters from one string that are in another string.
Definition analyze.cpp:156
bool is_iso_locase(char c)
Checks if a character is an ISO lowercase alphabetic character.
Definition analyze.cpp:28
string replace(string s, string what, string by)
Definition analyze.cpp:899
bool read_word(string s, int &i, string &result)
Definition analyze.cpp:681
int distance(string s1, string s2)
a measure of difference (distance) between two strings.
Definition analyze.cpp:1093
bool read_double(string s, int &i, double &result)
Definition analyze.cpp:651
string upcase_all(string s)
Converts all lowercase characters in a string to uppercase.
Definition analyze.cpp:127
string fnsymbol_nr(int nr)
Generates footnote symbols for a given integer.
Definition analyze.cpp:390
int find_non_alpha(string s, int pos, bool forward)
Definition analyze.cpp:935
string german_to_igerman(string s)
Convert german string to igerman string.
Definition analyze.cpp:308
bool is_numeric(string s)
Checks if a string contains only numeric characters.
Definition analyze.cpp:82
void skip_spaces(string s, int &i)
Definition analyze.cpp:698
static string cork_to_il2_string
Definition analyze.cpp:330
bool is_iso_upcase(char c)
Checks if a character is an ISO uppercase alphabetic character.
Definition analyze.cpp:35
void parse(string s, int &pos, QI &ret)
Definition analyze.cpp:755
bool read_line(string s, int &i, string &result)
Definition analyze.cpp:621
string Alpha_nr(int nr)
Generates an uppercase alphabetic string for an integer.
Definition analyze.cpp:385
string dos_to_better(string s)
Convert DOS line endings to more standard line endings.
Definition analyze.cpp:504
static void find_common(string s1, string s2, int &c1, int &c2)
Definition analyze.cpp:1029
int search_backwards(string s, int pos, string in)
Definition analyze.cpp:861
int overlapping(string s1, string s2)
Definition analyze.cpp:888
string raw_unquote(string s)
Remove quotes from a string label.
Definition analyze.cpp:426
int index_of(string s, char c)
Definition analyze.cpp:800
string trim_spaces_right(string s)
Definition analyze.cpp:981
string remove_suffix(string s, string suffix)
Remove the suffix from s if matches.
Definition analyze.cpp:175
bool is_locase(char c)
Definition analyze.hpp:29
bool is_digit(char c)
Definition analyze.hpp:37
bool is_space(char c)
Definition analyze.hpp:63
bool is_hex_digit(char c)
Definition analyze.hpp:49
int N(array< T > a)
Get the length of the array.
Definition array.hpp:170
SI as_int(double x)
Converts a double to a signed integer, rounding to the nearest integer.
Definition basic.hpp:261
blackbox b[13]
blackbox t[13]
The list class represents a linked list.
Definition list.hpp:48
short HI
Definition minmax.hpp:12
SI min(SI i, SI j)
Returns the minimum of two signed integers.
Definition minmax.hpp:27
SI max(SI i, SI j)
Returns the maximum of two signed integers.
Definition minmax.hpp:40
unsigned short HN
Definition minmax.hpp:13
char QI
Definition minmax.hpp:14
int SI
Definition minmax.hpp:10
unsigned char QN
Definition minmax.hpp:15
int from_hex(string s)
Converts a hexadecimal string to an integer.
Definition numeral.cpp:214
bool is_double(string s)
Definition string.cpp:376
double as_double(string s)
Definition string.cpp:279
bool is_quoted(string s)
Definition string.cpp:408
bool is_empty(string s)
Definition string.cpp:354
string suffix(url u, bool use_locase)
Definition url.cpp:381