Lolly 1.4.27
Loading...
Searching...
No Matches
unicode.hpp
Go to the documentation of this file.
1
2/******************************************************************************
3 * MODULE : unicode.hpp
4 * DESCRIPTION: Unicode support
5 * COPYRIGHT : (C) 2023 Darcy Shen
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12#pragma once
13
14#include "string.hpp"
15
16namespace lolly {
17namespace data {
18
19/**
20 * @brief Encode 4 bytes as string_u8
21 * @param code The 4 bytes to encode
22 * @return The UTF-8 string in string_u8
23 */
25
26/**
27 * @brief Decode string_u8 string as 4 bytes at position i
28 * @param s The string_u8 string
29 * @param i The position
30 * @return The 4 bytes in uint32_t
31 */
33
34string unicode_get_range (int code);
35
36/**
37 * @brief Checks if a string contains only CJK Unified Ideographs.
38 *
39 * @param s The string to check.
40 * @return True if all characters in the string are CJK Unified Ideographs,
41 * otherwise false.
42 * @note This function expects the CJK Unified Ideographs to be in a specific
43 * encoded format.
44 */
45bool is_cjk_unified_ideographs (string s);
46
47/**
48 * @brief Checks if a string contains any CJK Unified Ideographs.
49 *
50 * @param s The string to check.
51 * @return True if the string contains at least one CJK Unified Ideograph,
52 * otherwise false.
53 * @note This function expects the CJK Unified Ideographs to be in a specific
54 * encoded format.
55 */
56bool has_cjk_unified_ideographs (string s);
57
58/**
59 * @brief Convert UTF-16 string to UTF-8 string
60 * @param s_u16 the string using the UTF-16 encoding
61 * @return the string using the UTF-8 encoding
62 * @note For invalid UTF-16 string, only the valid part will be converted
63 */
64string utf16_to_utf8 (string s_u16);
65
66#if defined(OS_MINGW) || defined(_WIN32) || defined(_WIN64)
67/**
68 * @brief Convert wchar_t* to UTF-8 string
69 * @param s_u16 the string using the UTF-16 encoding
70 * @return the string using the UTF-8 encoding
71 * @note For invalid UTF-16 string, only the valid part will be converted
72 */
73string wchar_to_utf8 (const wchar_t* s_u16);
74#endif
75
76/**
77 * @brief Convert UTF-8 string to UTF-16 string
78 * @param s_u8 the string using the UTF-8 encoding
79 * @return the string using the UTF-16 encoding
80 * @note For invalid UTF-8 string, only the valid part will be converted
81 */
82string utf8_to_utf16 (string s_u8);
83} // namespace data
84} // namespace lolly
The list class represents a linked list.
Definition list.hpp:48
string_u8 encode_as_utf8(uint32_t code)
Encode 4 bytes as string_u8.
Definition unicode.cpp:19
string unicode_get_range(int code)
Definition unicode.cpp:99
uint32_t decode_from_utf8(string_u8 s, int &i)
Decode string_u8 string as 4 bytes at position i.
Definition unicode.cpp:52
bool is_cjk_unified_ideographs(string s)
Checks if a string contains only CJK Unified Ideographs.
Definition unicode.cpp:117
string utf16_to_utf8(string s_u16)
Convert UTF-16 string to UTF-8 string.
Definition unicode.cpp:155
string utf8_to_utf16(string s_u8)
Convert UTF-8 string to UTF-16 string.
Definition unicode.cpp:208
bool has_cjk_unified_ideographs(string s)
Checks if a string contains any CJK Unified Ideographs.
Definition unicode.cpp:136