Lolly 1.4.27
Loading...
Searching...
No Matches
analyze.hpp
Go to the documentation of this file.
1
2/******************************************************************************
3 * MODULE : analyze.hpp
4 * DESCRIPTION: Properties of characters and strings
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12#ifndef ANALYZE_H
13#define ANALYZE_H
14
15#include <stdint.h>
16
17#include "array.hpp"
18#include "hashset.hpp"
19#include "ntuple.hpp"
20#include "string.hpp"
21
22class object;
23
24inline bool
25is_alpha (char c) {
26 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
27}
28inline bool
29is_locase (char c) {
30 return (c >= 'a') && (c <= 'z');
31}
32inline bool
33is_upcase (char c) {
34 return (c >= 'A') && (c <= 'Z');
35}
36inline bool
37is_digit (char c) {
38 return (c >= '0') && (c <= '9');
39}
40inline bool
42 return c == '0' || c == '1';
43}
44inline bool
46 return c >= '0' && c <= '7';
47}
48inline bool
50 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
51 (c >= 'a' && c <= 'f');
52}
53inline bool
54is_numeric (char c) {
55 return ((c >= '0') && (c <= '9')) || (c == '.');
56}
57inline bool
59 return (c == '.') || (c == ',') || (c == ':') || (c == '\'') || (c == '`') ||
60 (c == ';') || (c == '!') || (c == '?');
61}
62inline bool
63is_space (char c) {
64 return (c == ' ') || (c == '\11') || (c == '\12') || (c == '\15');
65}
66
67/**
68 * @brief Checks if a character is an ISO alphabetic character
69 *
70 * @param c The character to be checked.
71 * @return True if the character is an ISO alphabetic character; otherwise,
72 * returns False.
73 */
74bool is_iso_alpha (char c);
75
76/**
77 * @brief Checks if a character is an ISO lowercase alphabetic character.
78 *
79 * @param c The character to check.
80 * @return True if the character is ISO lowercase alphabetic, otherwise false.
81 */
82bool is_iso_locase (char c);
83
84/**
85 * @brief Checks if a character is an ISO uppercase alphabetic character.
86 *
87 * @param c The character to check.
88 * @return True if the character is ISO uppercase alphabetic, otherwise false.
89 */
90bool is_iso_upcase (char c);
91
92/**
93 * @brief Checks if a string contains only alphabetic characters.
94 *
95 * @param s The string to check.
96 * @return True if all characters in the string are alphabetic, otherwise false.
97 */
98bool is_alpha (string s);
99
100bool is_alphanum (string s);
101
102/**
103 * @brief Checks if a string contains only lowercase alphabetic characters.
104 *
105 * @param s The string to check.
106 * @return True if all characters in the string are lowercase alphabetic,
107 * otherwise false.
108 */
109bool is_locase_alpha (string s);
110
111/**
112 * @brief Checks if a string contains only ISO alphabetic characters.
113 *
114 * @param s The string to check.
115 * @return True if all characters in the string are ISO alphabetic, otherwise
116 * false.
117 */
118bool is_iso_alpha (string s);
119
120/**
121 * @brief Checks if a string contains only numeric characters.
122 *
123 * @param s The string to check.
124 * @return True if all characters in the string are numeric, otherwise false.
125 */
126bool is_numeric (string s);
127
128/**
129 * @brief Converts a lowercase character to uppercase.
130 *
131 * @param c The character to convert.
132 * @return The uppercase version of the character if it is an ISO lowercase
133 * alphabetic character; otherwise, returns the original character.
134 */
135char upcase (char s);
136
137/**
138 * @brief Converts an uppercase character to lowercase.
139 *
140 * @param c The character to convert.
141 * @return The lowercase version of the character if it is an ISO uppercase
142 * alphabetic character; otherwise, returns the original character.
143 */
144char locase (char s);
145
146/**
147 * @brief Finds the closing delimiter corresponding to the given opening
148 * delimiter.
149 *
150 * @param c The opening delimiter character.
151 * @return The corresponding closing delimiter if known, otherwise returns the
152 * original character.
153 */
154char closing_delimiter (char c);
155
156/**
157 * @brief Converts the first character of a string to uppercase.
158 *
159 * @param s The string to convert.
160 * @return A new string where the first character is converted to uppercase, if
161 * it is an ISO lowercase alphabetic character.
162 */
163string upcase_first (string s);
164
165/**
166 * @brief Converts the first character of a string to lowercase.
167 *
168 * @param s The string to convert.
169 * @return A new string where the first character is converted to lowercase, if
170 * it is an ISO uppercase alphabetic character.
171 */
172string locase_first (string s);
173
174/**
175 * @brief Converts all lowercase characters in a string to uppercase.
176 *
177 * @param s The string to convert.
178 * @return A new string where all ISO lowercase alphabetic characters are
179 * converted to uppercase.
180 */
181string upcase_all (string s);
182
183/**
184 * @brief Converts all uppercase characters in a string to lowercase.
185 *
186 * @param s The string to convert.
187 * @return A new string where all ISO uppercase alphabetic characters are
188 * converted to lowercase.
189 */
190string locase_all (string s);
191
192/**
193 * @brief Union of two strings.
194 *
195 * This function performs the union operation between two strings.
196 *
197 * @param s1 The first string.
198 * @param s2 The second string.
199 * @return A new string containing the union of s1 and s2.
200 */
201string string_union (string s1, string s2);
202
203/**
204 * @brief Remove characters from one string that are in another string.
205 *
206 * @param s1 The first string.
207 * @param s2 The second string.
208 * @return A new string containing s1 - s2.
209 */
210string string_minus (string s1, string s2);
211
212/**
213 * @brief Remove the prefix from s if matches
214 * @param s the string
215 * @param prefix the prefix
216 * @return If the prefix matches, return s with prefix removed,
217 * otherwise, return s
218 */
219string remove_prefix (string s, string prefix);
220
221/**
222 * @brief Remove the suffix from s if matches
223 * @param s the string
224 * @param prefix the suffix
225 * @return If the suffix matches, return s with suffix removed,
226 * otherwise, return s
227 */
228string remove_suffix (string s, string suffix);
229
230string il2_to_cork (string s);
231string cork_to_il2 (string s);
232
233/**
234 * @brief Convert ispanish string to Spanish string.
235 *
236 * @param s The ispanish string.
237 * @return The converted Spanish string.
238 */
239string ispanish_to_spanish (string s);
240
241/**
242 * @brief Convert Spanish string to ispanish string.
243 *
244 * @param s The Spanish string.
245 * @return The converted ispanish string.
246 */
247string spanish_to_ispanish (string s);
248
249/**
250 * @brief Convert igerman string to german string.
251 *
252 * @param s The igerman string.
253 * @return The converted german string.
254 */
255string igerman_to_german (string s);
256
257/**
258 * @brief Convert german string to igerman string.
259 *
260 * @param s The german string.
261 * @return The converted igerman string.
262 */
263string german_to_igerman (string s);
264
265/**
266 * @brief Converts tabs in a string to spaces.
267 *
268 * @param s The original string with tabs.
269 * @param tw The tab width.
270 * @return A new string with tabs replaced by spaces.
271 */
272string convert_tabs_to_spaces (string s, int w);
273
274/**
275 * @brief Generates an alphabetic string for an integer.
276 *
277 * @param nr The integer to be converted to an alphabetic string.
278 * @return A string representing the alphabetic character.
279 */
280string alpha_nr (int nr);
281
282/**
283 * @brief Generates an uppercase alphabetic string for an integer.
284 *
285 * @param nr The integer to be converted to an alphabetic string.
286 * @return A string representing the uppercase alphabetic character.
287 */
288string Alpha_nr (int nr);
289
290/**
291 * @brief Generates footnote symbols for a given integer.
292 *
293 * @param nr The integer to be converted to a footnote symbol.
294 * @return A string representing the footnote symbol.
295 */
296string fnsymbol_nr (int nr);
297
298/**
299 * @brief Add quotes around a string to indicate it's a string, not a symbol.
300 *
301 * This function is used for marking the label of a STRING tree as representing
302 * a string and not a symbol.
303 *
304 * @param s The input string.
305 * @return A new string with quotes around it.
306 */
307string raw_quote (string s);
308
309/**
310 * @brief Remove quotes from a string label.
311 *
312 * This function is used to get the string value of a STRING tree label
313 * representing a string.
314 *
315 * @param s The input string.
316 * @return The string without the quotes.
317 */
318string raw_unquote (string s);
319
320/**
321 * @brief Escape a string for use in shell scripts.
322 *
323 * @param s The input string.
324 * @return An escaped string.
325 */
326string escape_sh (string s);
327
328/**
329 * @brief Escape a string with generic escape sequences.
330 *
331 * @param s The input string.
332 * @return An escaped string.
333 */
334string escape_generic (string s);
335
336/**
337 * @brief Escape a string to be displayed verbatim.
338 *
339 * @param s The input string.
340 * @return An escaped string.
341 */
342string escape_verbatim (string s);
343
344/**
345 * @brief Escape spaces in a string with a backslash.
346 *
347 * @param s The input string.
348 * @return A string with escaped spaces.
349 */
350string escape_spaces (string s);
351
352/**
353 * @brief Unescape a Guile-syntax string.
354 *
355 * @param s The input string.
356 * @return A string with special characters unescaped.
357 */
358string unescape_guile (string s);
359
360/**
361 * @brief Convert DOS line endings to more standard line endings.
362 *
363 * @param s The input string.
364 * @return A string with Unix-style line endings.
365 */
366string dos_to_better (string s);
367
368bool test (string s, int i, const char* test);
369bool test (string s, int i, string test);
370bool starts (string s, const char* test);
371bool starts (string s, const string test);
372bool ends (string s, const char* test);
373bool ends (string s, const string test);
374bool read (string s, int& i, const char* test);
375bool read (string s, int& i, string test);
376bool read (string s, string test);
377bool read_line (string s, int& i, string& result);
378bool read_int (string s, int& i, int& result);
379bool read_double (string s, int& i, double& result);
380bool read_word (string s, int& i, string& result);
381bool is_whitespace (string s);
382void skip_spaces (string s, int& i);
383void skip_whitespace (string s, int& i);
384void skip_line (string s, int& i);
385void skip_symbol (string s, int& i);
386
387void parse (string s, int& pos, QI& ret);
388void parse (string s, int& pos, QN& ret);
389void parse (string s, int& pos, HI& ret);
390void parse (string s, int& pos, HN& ret);
391void parse (string s, int& pos, SI& ret);
392void parse (string s, int& pos, SI*& a, int len);
393
394int index_of (string s, char c);
395
396/**
397 * Searches for a substring in a string.
398 *
399 * @param what The substring to search for.
400 * @param in The string to search in.
401 * @return Position where the substring was found, or -1 if not found.
402 */
403int search_forwards (string what, string in);
404
405/**
406 * Searches for a substring in a string starting from a specified position.
407 *
408 * @param what The substring to search for.
409 * @param pos The starting position in the string to search from.
410 * @param in The string to search in.
411 * @return Position where the substring was found, or -1 if not found.
412 */
413int search_forwards (string what, int pos, string in);
414
415/**
416 * Searches for a substring in a string starting from a specified position, in
417 * reverse.
418 *
419 * @param what_list The substring to search for.
420 * @param pos The starting position in the string to search from.
421 * @param in The string to search in.
422 * @return Position where the substring was found, or -1 if not found.
423 */
424int search_forwards (array<string> what_list, int pos, string in);
425
426/**
427 * Searches for a substring in a string, in reverse.
428 *
429 * @param s The substring to search for.
430 * @param in The string to search in.
431 * @return Position where the substring was found, or -1 if not found.
432 */
433int search_backwards (string what, string in);
434
435/**
436 * Searches for a substring in a string starting from a specified position, in
437 * reverse.
438 *
439 * @param s The substring to search for.
440 * @param pos The starting position in the string to search from.
441 * @param in The string to search in.
442 * @return Position where the substring was found, or -1 if not found.
443 */
444int search_backwards (string what, int pos, string in);
445
446/**
447 * Counts the occurrences of a substring in a string.
448 *
449 * @param s The substring to count.
450 * @param in The string to search in.
451 * @return Number of occurrences of the substring.
452 */
453int count_occurrences (string what, string in);
454
455/**
456 * Checks whether a substring occurs within another string.
457 *
458 * @param what The string to find.
459 * @param in The string in which to search.
460 * @return True if the string occurs, otherwise false.
461 */
462bool occurs (string what, string in);
463
464bool contains (string s, string what);
465bool contains (string s, char c);
466
467/**
468 * Finds the length of the longest string that is both a suffix of the first
469 * string and a prefix of the second string.
470 *
471 * @param s1 The first string.
472 * @param s2 The second string.
473 * @return The length of the longest overlapping string.
474 */
475int overlapping (string s1, string s2);
476
477/**
478 * Replaces all occurrences of a specific substring within a string.
479 *
480 * @param s The original string.
481 * @param what The substring to replace.
482 * @param by The string with which to replace the substring.
483 * @return The new string with all occurrences of the substring replaced.
484 */
485string replace (string s, string what, string by);
486
487/**
488 * Matches a string against a wildcard pattern.
489 *
490 * @param s The string to match.
491 * @param w The wildcard pattern.
492 * @return True if the string matches the wildcard pattern, otherwise false.
493 */
494bool match_wildcard (string s, string w);
495
496/**
497 * Finds the position of the first non-alphabetic character in a string.
498 *
499 * @param s The string to search.
500 * @param pos The position from which to start the search.
501 * @param forward True to search forward, false to search backward.
502 * @return The position of the first non-alphabetic character; -1 if not found.
503 */
504int find_non_alpha (string s, int pos, bool forward);
505
506/**
507 * Splits a string into an array of strings based on a separator string.
508 *
509 * @param s The string to split.
510 * @param sep The separator string.
511 * @return An array of strings split based on the separator.
512 */
513array<string> tokenize (string s, string sep);
514
515/**
516 * Joins an array of strings into a single string, separated by a specific
517 * string.
518 *
519 * @param a The array of strings to join.
520 * @param sep The separator string.
521 * @return A single string consisting of the array elements separated by the
522 * separator.
523 */
524string recompose (array<string> a, string sep);
525
526/**
527 * Removes all leading spaces from a string.
528 *
529 * @param s The string to trim.
530 * @return The string without leading spaces.
531 */
532string trim_spaces_left (string s);
533
534/**
535 * Removes all leading spaces from a string array.
536 *
537 * @param a The string array to trim.
538 * @return The string without leading spaces.
539 */
541
542/**
543 * Removes all trailing spaces from a string.
544 *
545 * @param s The string to trim.
546 * @return The string without trailing spaces.
547 */
548string trim_spaces_right (string s);
549
550/**
551 * Removes all trailing spaces from a string array.
552 *
553 * @param s The string array to trim.
554 * @return The string without trailing spaces.
555 */
557
558/**
559 * Removes all leading and trailing spaces from a string.
560 *
561 * @param s The string to trim.
562 * @return The string without leading and trailing spaces.
563 */
564string trim_spaces (string s);
565
566/**
567 * Removes all leading and trailing spaces from an array of strings.
568 *
569 * @param a The array of strings to trim.
570 * @return An array of strings with all leading and trailing spaces removed.
571 */
573
574/**
575 * @brief the differences between two strings by identifying the common
576 * substrings and returning the different sections' indices.
577 *
578 * @param s1 The first string.
579 * @param s2 The second string.
580 * @return An array containing sets of four integers:
581 */
582array<int> differences (string s1, string s2);
583
584/**
585 * @brief a measure of difference (distance) between two strings.
586 *
587 * @param s1 The first string.
588 * @param s2 The second string.
589 * @return An integer representing the sum of lengths of all differing sections
590 * between the two strings.
591 */
592int distance (string s1, string s2);
593
594/**
595 * @brief Parses a string containing a length value and its unit.
596 *
597 * @param[in] s The string to parse, e.g., "12.34cm".
598 * @param[out] len The parsed numerical value of the length.
599 * @param[out] unit The unit of the length as a string, e.g., "cm".
600 */
601void parse_length (string s, double& len, string& unit);
602
603#endif // defined ANALYZE_H
string alpha_nr(int nr)
Generates an alphabetic string for an integer.
Definition analyze.cpp:377
string cork_to_il2(string s)
Definition analyze.cpp:364
bool is_alphanum(string s)
Definition analyze.cpp:55
string escape_spaces(string s)
Escape spaces in a string with a backslash.
Definition analyze.cpp:492
string igerman_to_german(string s)
Convert igerman string to german string.
Definition analyze.cpp:298
bool is_locase(char c)
Definition analyze.hpp:29
void skip_line(string s, int &i)
Definition analyze.cpp:712
string locase_all(string s)
Converts all uppercase characters in a string to lowercase.
Definition analyze.cpp:137
string unescape_guile(string s)
Unescape a Guile-syntax string.
Definition analyze.cpp:515
bool read(string s, int &i, const char *test)
Definition analyze.cpp:589
bool is_digit(char c)
Definition analyze.hpp:37
bool is_space(char c)
Definition analyze.hpp:63
bool is_numeric(char c)
Definition analyze.hpp:54
string trim_spaces(string s)
Definition analyze.cpp:989
array< string > tokenize(string s, string sep)
Definition analyze.cpp:948
void skip_symbol(string s, int &i)
Definition analyze.cpp:720
string il2_to_cork(string s)
Definition analyze.cpp:355
bool is_binary_digit(char c)
Definition analyze.hpp:41
string string_union(string s1, string s2)
Union of two strings.
Definition analyze.cpp:151
void parse_length(string s, double &len, string &unit)
Parses a string containing a length value and its unit.
Definition analyze.cpp:1106
string remove_prefix(string s, string prefix)
Remove the prefix from s if matches.
Definition analyze.cpp:168
int search_backwards(string what, string in)
Definition analyze.cpp:870
string raw_quote(string s)
Add quotes around a string to indicate it's a string, not a symbol.
Definition analyze.cpp:420
bool is_punctuation(char c)
Definition analyze.hpp:58
bool contains(string s, string what)
Definition analyze.cpp:845
bool is_iso_alpha(char c)
Checks if a character is an ISO alphabetic character.
Definition analyze.cpp:21
bool is_whitespace(string s)
Definition analyze.cpp:691
string spanish_to_ispanish(string s)
Convert Spanish string to ispanish string.
Definition analyze.cpp:244
string escape_sh(string s)
Escape a string for use in shell scripts.
Definition analyze.cpp:437
string trim_spaces_left(string s)
Definition analyze.cpp:973
void skip_whitespace(string s, int &i)
Definition analyze.cpp:705
string upcase_first(string s)
Converts the first character of a string to uppercase.
Definition analyze.cpp:115
int search_forwards(string what, string in)
Definition analyze.cpp:835
bool occurs(string what, string in)
Definition analyze.cpp:840
bool read_int(string s, int &i, int &result)
Definition analyze.cpp:634
char upcase(char s)
Converts a lowercase character to uppercase.
Definition analyze.cpp:95
bool is_locase_alpha(string s)
Checks if a string contains only lowercase alphabetic characters.
Definition analyze.cpp:64
char closing_delimiter(char c)
Finds the closing delimiter corresponding to the given opening delimiter.
Definition analyze.cpp:107
string locase_first(string s)
Converts the first character of a string to lowercase.
Definition analyze.cpp:121
string recompose(array< string > a, string sep)
Definition analyze.cpp:963
string escape_generic(string s)
Escape a string with generic escape sequences.
Definition analyze.cpp:469
string ispanish_to_spanish(string s)
Convert ispanish string to Spanish string.
Definition analyze.cpp:186
array< int > differences(string s1, string s2)
the differences between two strings by identifying the common substrings and returning the different ...
Definition analyze.cpp:1047
bool is_alpha(char c)
Definition analyze.hpp:25
bool is_hex_digit(char c)
Definition analyze.hpp:49
bool test(string s, int i, const char *test)
Definition analyze.cpp:542
string escape_verbatim(string s)
Escape a string to be displayed verbatim.
Definition analyze.cpp:480
bool is_octal_digit(char c)
Definition analyze.hpp:45
string string_minus(string s1, string s2)
Remove characters from one string that are in another string.
Definition analyze.cpp:156
bool is_iso_locase(char c)
Checks if a character is an ISO lowercase alphabetic character.
Definition analyze.cpp:28
string replace(string s, string what, string by)
Definition analyze.cpp:899
bool read_word(string s, int &i, string &result)
Definition analyze.cpp:681
int distance(string s1, string s2)
a measure of difference (distance) between two strings.
Definition analyze.cpp:1093
bool read_double(string s, int &i, double &result)
Definition analyze.cpp:651
string upcase_all(string s)
Converts all lowercase characters in a string to uppercase.
Definition analyze.cpp:127
bool ends(string s, const char *test)
Definition analyze.cpp:576
string fnsymbol_nr(int nr)
Generates footnote symbols for a given integer.
Definition analyze.cpp:390
int find_non_alpha(string s, int pos, bool forward)
Definition analyze.cpp:935
string german_to_igerman(string s)
Convert german string to igerman string.
Definition analyze.cpp:308
char locase(char s)
Converts an uppercase character to lowercase.
Definition analyze.cpp:101
void skip_spaces(string s, int &i)
Definition analyze.cpp:698
int count_occurrences(string what, string in)
Definition analyze.cpp:875
bool is_iso_upcase(char c)
Checks if a character is an ISO uppercase alphabetic character.
Definition analyze.cpp:35
void parse(string s, int &pos, QI &ret)
Definition analyze.cpp:755
bool read_line(string s, int &i, string &result)
Definition analyze.cpp:621
string Alpha_nr(int nr)
Generates an uppercase alphabetic string for an integer.
Definition analyze.cpp:385
bool starts(string s, const char *test)
Definition analyze.cpp:566
string dos_to_better(string s)
Convert DOS line endings to more standard line endings.
Definition analyze.cpp:504
bool match_wildcard(string s, string w)
Definition analyze.cpp:930
string convert_tabs_to_spaces(string s, int w)
Converts tabs in a string to spaces.
Definition analyze.cpp:732
bool is_upcase(char c)
Definition analyze.hpp:33
int overlapping(string s1, string s2)
Definition analyze.cpp:888
string raw_unquote(string s)
Remove quotes from a string label.
Definition analyze.cpp:426
int index_of(string s, char c)
Definition analyze.cpp:800
string trim_spaces_right(string s)
Definition analyze.cpp:981
string remove_suffix(string s, string suffix)
Remove the suffix from s if matches.
Definition analyze.cpp:175
The list class represents a linked list.
Definition list.hpp:48
short HI
Definition minmax.hpp:12
unsigned short HN
Definition minmax.hpp:13
char QI
Definition minmax.hpp:14
int SI
Definition minmax.hpp:10
unsigned char QN
Definition minmax.hpp:15
string suffix(url u, bool use_locase)
Definition url.cpp:381