1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #ifndef __CHARSET_H_ 9 #define __CHARSET_H_ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 14 #define MAX_UTF8_PER_UTF16 3 15 16 /** 17 * codepage_437 - Unicode to codepage 437 translation table 18 */ 19 extern const u16 codepage_437[128]; 20 21 /** 22 * console_read_unicode() - read Unicode code point from console 23 * 24 * @code: pointer to store Unicode code point 25 * Return: 0 = success 26 */ 27 int console_read_unicode(s32 *code); 28 29 /** 30 * utf8_get() - get next UTF-8 code point from buffer 31 * 32 * @src: pointer to current byte, updated to point to next byte 33 * Return: code point, or 0 for end of string, or -1 if no legal 34 * code point is found. In case of an error src points to 35 * the incorrect byte. 36 */ 37 s32 utf8_get(const char **src); 38 39 /** 40 * utf8_put() - write UTF-8 code point to buffer 41 * 42 * @code: code point 43 * @dst: pointer to destination buffer, updated to next position 44 * Return: -1 if the input parameters are invalid 45 */ 46 int utf8_put(s32 code, char **dst); 47 48 /** 49 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 50 * to utf-16 51 * 52 * @src: utf-8 string 53 * @count: maximum number of code points to convert 54 * Return: length in u16 after conversion to utf-16 without the 55 * trailing \0. If an invalid UTF-8 sequence is hit one 56 * u16 will be reserved for a replacement character. 57 */ 58 size_t utf8_utf16_strnlen(const char *src, size_t count); 59 60 /** 61 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 62 * 63 * @a: utf-8 string 64 * Return: length in u16 after conversion to utf-16 without the 65 * trailing \0. If an invalid UTF-8 sequence is hit one 66 * u16 will be reserved for a replacement character. 67 */ 68 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 69 70 /** 71 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 72 * 73 * @dst: destination buffer 74 * @src: source buffer 75 * @count: maximum number of code points to copy 76 * Return: -1 if the input parameters are invalid 77 */ 78 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 79 80 /** 81 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 82 * 83 * @d: destination buffer 84 * @s: source buffer 85 * Return: -1 if the input parameters are invalid 86 */ 87 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 88 89 /** 90 * utf16_get() - get next UTF-16 code point from buffer 91 * 92 * @src: pointer to current word, updated to point to next word 93 * Return: code point, or 0 for end of string, or -1 if no legal 94 * code point is found. In case of an error src points to 95 * the incorrect word. 96 */ 97 s32 utf16_get(const u16 **src); 98 99 /** 100 * utf16_put() - write UTF-16 code point to buffer 101 * 102 * @code: code point 103 * @dst: pointer to destination buffer, updated to next position 104 * Return: -1 if the input parameters are invalid 105 */ 106 int utf16_put(s32 code, u16 **dst); 107 108 /** 109 * utf16_strnlen() - length of a truncated utf-16 string 110 * 111 * @src: utf-16 string 112 * @count: maximum number of code points to convert 113 * Return: length in code points. If an invalid UTF-16 sequence is 114 * hit one position will be reserved for a replacement 115 * character. 116 */ 117 size_t utf16_strnlen(const u16 *src, size_t count); 118 119 /** 120 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 121 * to utf-8 122 * 123 * @src: utf-16 string 124 * @count: maximum number of code points to convert 125 * Return: length in bytes after conversion to utf-8 without the 126 * trailing \0. If an invalid UTF-16 sequence is hit one 127 * byte will be reserved for a replacement character. 128 */ 129 size_t utf16_utf8_strnlen(const u16 *src, size_t count); 130 131 /** 132 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 133 * 134 * @a: utf-16 string 135 * Return: length in bytes after conversion to utf-8 without the 136 * trailing \0. If an invalid UTF-16 sequence is hit one 137 * byte will be reserved for a replacement character. 138 */ 139 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 140 141 /** 142 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 143 * 144 * @dst: destination buffer 145 * @src: source buffer 146 * @count: maximum number of code points to copy 147 * Return: -1 if the input parameters are invalid 148 */ 149 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 150 151 /** 152 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 153 * 154 * @d: destination buffer 155 * @s: source buffer 156 * Return: -1 if the input parameters are invalid 157 */ 158 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 159 160 /** 161 * utf_to_lower() - convert a Unicode letter to lower case 162 * 163 * @code: letter to convert 164 * Return: lower case letter or unchanged letter 165 */ 166 s32 utf_to_lower(const s32 code); 167 168 /** 169 * utf_to_upper() - convert a Unicode letter to upper case 170 * 171 * @code: letter to convert 172 * Return: upper case letter or unchanged letter 173 */ 174 s32 utf_to_upper(const s32 code); 175 176 /** 177 * u16_strncmp() - compare two u16 string 178 * 179 * @s1: first string to compare 180 * @s2: second string to compare 181 * @n: maximum number of u16 to compare 182 * Return: 0 if the first n u16 are the same in s1 and s2 183 * < 0 if the first different u16 in s1 is less than the 184 * corresponding u16 in s2 185 * > 0 if the first different u16 in s1 is greater than the 186 * corresponding u16 in s2 187 */ 188 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n); 189 190 /** 191 * u16_strcmp() - compare two u16 string 192 * 193 * @s1: first string to compare 194 * @s2: second string to compare 195 * Return: 0 if the first n u16 are the same in s1 and s2 196 * < 0 if the first different u16 in s1 is less than the 197 * corresponding u16 in s2 198 * > 0 if the first different u16 in s1 is greater than the 199 * corresponding u16 in s2 200 */ 201 #define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX) 202 203 /** 204 * u16_strlen - count non-zero words 205 * 206 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 207 * In the EFI context we explicitly need a function handling u16 strings. 208 * 209 * @in: null terminated u16 string 210 * Return: number of non-zero words. 211 * This is not the number of utf-16 letters! 212 */ 213 size_t u16_strlen(const void *in); 214 215 /** 216 * u16_strsize() - count size of u16 string in bytes including the null 217 * character 218 * 219 * Counts the number of bytes occupied by a u16 string 220 * 221 * @in: null terminated u16 string 222 * Return: bytes in a u16 string 223 */ 224 size_t u16_strsize(const void *in); 225 226 /** 227 * u16_strnlen() - count non-zero words 228 * 229 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 230 * In the EFI context we explicitly need a function handling u16 strings. 231 * 232 * @in: null terminated u16 string 233 * @count: maximum number of words to count 234 * Return: number of non-zero words. 235 * This is not the number of utf-16 letters! 236 */ 237 size_t u16_strnlen(const u16 *in, size_t count); 238 239 /** 240 * u16_strcpy() - copy u16 string 241 * 242 * Copy u16 string pointed to by src, including terminating null word, to 243 * the buffer pointed to by dest. 244 * 245 * @dest: destination buffer 246 * @src: source buffer (null terminated) 247 * Return: 'dest' address 248 */ 249 u16 *u16_strcpy(u16 *dest, const u16 *src); 250 251 /** 252 * u16_strdup() - duplicate u16 string 253 * 254 * Copy u16 string pointed to by src, including terminating null word, to a 255 * newly allocated buffer. 256 * 257 * @src: source buffer (null terminated) 258 * Return: allocated new buffer on success, NULL on failure 259 */ 260 u16 *u16_strdup(const void *src); 261 262 /** 263 * utf16_to_utf8() - Convert an utf16 string to utf8 264 * 265 * Converts 'size' characters of the utf16 string 'src' to utf8 266 * written to the 'dest' buffer. 267 * 268 * NOTE that a single utf16 character can generate up to 3 utf8 269 * characters. See MAX_UTF8_PER_UTF16. 270 * 271 * @dest: the destination buffer to write the utf8 characters 272 * @src: the source utf16 string 273 * @size: the number of utf16 characters to convert 274 * Return: the pointer to the first unwritten byte in 'dest' 275 */ 276 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 277 278 /** 279 * utf_to_cp() - translate Unicode code point to 8bit codepage 280 * 281 * Codepoints that do not exist in the codepage are rendered as question mark. 282 * 283 * @c: pointer to Unicode code point to be translated 284 * @codepage: Unicode to codepage translation table 285 * Return: 0 on success, -ENOENT if codepoint cannot be translated 286 */ 287 int utf_to_cp(s32 *c, const u16 *codepage); 288 289 /** 290 * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 291 * 292 * @c: next UTF-8 character to convert 293 * @buffer: buffer, at least 5 characters 294 * Return: next codepage 437 character or 0 295 */ 296 int utf8_to_cp437_stream(u8 c, char *buffer); 297 298 /** 299 * utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32 300 * 301 * @c: next UTF-8 character to convert 302 * @buffer: buffer, at least 5 characters 303 * Return: next codepage 437 character or 0 304 */ 305 int utf8_to_utf32_stream(u8 c, char *buffer); 306 307 #endif /* __CHARSET_H_ */ 308