#include <stdlib.h>
#include <string.h>
#include "stringprep.h"
#include "gunidecomp.h"
#include "gunicomp.h"
Go to the source code of this file.
Defines | |
#define | gboolean int |
#define | gchar char |
#define | guchar unsigned char |
#define | glong long |
#define | gint int |
#define | guint unsigned int |
#define | gushort unsigned short |
#define | gint16 int16_t |
#define | guint16 uint16_t |
#define | gunichar uint32_t |
#define | gsize size_t |
#define | gssize ssize_t |
#define | g_malloc malloc |
#define | g_free free |
#define | GError void |
#define | g_set_error(a, b, c, d) ((void) 0) |
#define | g_new(struct_type, n_structs) ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs)))) |
#define | G_STMT_START do |
#define | G_STMT_END while (0) |
#define | g_return_val_if_fail(expr, val) G_STMT_START{ (void)0; }G_STMT_END |
#define | G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) |
#define | TRUE 1 |
#define | FALSE 0 |
#define | UTF8_COMPUTE(Char, Mask, Len) |
#define | UTF8_LENGTH(Char) |
#define | UTF8_GET(Result, Chars, Count, Mask, Len) |
#define | UNICODE_VALID(Char) |
#define | g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)]) |
#define | CC_PART1(Page, Char) |
#define | CC_PART2(Page, Char) |
#define | COMBINING_CLASS(Char) |
#define | SBase 0xAC00 |
#define | LBase 0x1100 |
#define | VBase 0x1161 |
#define | TBase 0x11A7 |
#define | LCount 19 |
#define | VCount 21 |
#define | TCount 28 |
#define | NCount (VCount * TCount) |
#define | SCount (LCount * NCount) |
#define | CI(Page, Char) |
#define | COMPOSE_INDEX(Char) ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) |
Enumerations | |
enum | GNormalizeMode { G_NORMALIZE_DEFAULT, G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_ALL, G_NORMALIZE_NFKD = G_NORMALIZE_ALL, G_NORMALIZE_ALL_COMPOSE, G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE } |
Functions | |
uint32_t | stringprep_utf8_to_unichar (const char *p) |
int | stringprep_unichar_to_utf8 (uint32_t c, char *outbuf) |
uint32_t * | stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t *items_written) |
char * | stringprep_ucs4_to_utf8 (const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written) |
char * | stringprep_utf8_nfkc_normalize (const char *str, ssize_t len) |
uint32_t * | stringprep_ucs4_nfkc_normalize (uint32_t *str, ssize_t len) |
#define CC_PART1 | ( | Page, | |||
Char | ) |
Value:
((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (cclass_data[combining_class_table_part1[Page]][Char]))
#define CC_PART2 | ( | Page, | |||
Char | ) |
Value:
((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (cclass_data[combining_class_table_part2[Page]][Char]))
#define CI | ( | Page, | |||
Char | ) |
Value:
((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (compose_data[compose_table[Page]][Char]))
#define COMBINING_CLASS | ( | Char | ) |
Value:
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \ ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \ : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \ ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \ : 0))
#define COMPOSE_INDEX | ( | Char | ) | ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) |
#define G_N_ELEMENTS | ( | arr | ) | (sizeof (arr) / sizeof ((arr)[0])) |
#define g_new | ( | struct_type, | |||
n_structs | ) | ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs)))) |
#define g_return_val_if_fail | ( | expr, | |||
val | ) | G_STMT_START{ (void)0; }G_STMT_END |
#define g_utf8_next_char | ( | p | ) | (char *)((p) + g_utf8_skip[*(guchar *)(p)]) |
#define glong long |
Definition at line 42 of file nfkc.c.
Referenced by stringprep_ucs4_to_utf8(), and stringprep_utf8_to_ucs4().
#define UNICODE_VALID | ( | Char | ) |
#define UTF8_GET | ( | Result, | |||
Chars, | |||||
Count, | |||||
Mask, | |||||
Len | ) |
#define UTF8_LENGTH | ( | Char | ) |
enum GNormalizeMode |
uint32_t* stringprep_ucs4_nfkc_normalize | ( | uint32_t * | str, | |
ssize_t | len | |||
) |
stringprep_ucs4_nfkc_normalize - normalize Unicode string
str | a Unicode string. | |
len | length of array, or -1 if is nul-terminated. |
Return value: a newly allocated Unicode string, that is the NFKC normalized form of .
Definition at line 1048 of file nfkc.c.
References G_NORMALIZE_NFKC, stringprep_ucs4_to_utf8(), and uint32_t.
Referenced by stringprep_4i().
char* stringprep_ucs4_to_utf8 | ( | const uint32_t * | str, | |
ssize_t | len, | |||
size_t * | items_read, | |||
size_t * | items_written | |||
) |
stringprep_ucs4_to_utf8 - convert UCS-4 string to UTF-8
str | a UCS-4 encoded string | |
len | the maximum length of to use. If < 0, then the string is terminated with a 0 character. | |
items_read | location to store number of characters read read, or NULL. | |
items_written | location to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte. |
Return value: a pointer to a newly allocated UTF-8 string. This value must be freed with free(). If an error occurs, NULL will be returned and set.
Definition at line 1001 of file nfkc.c.
References glong.
Referenced by idna_to_ascii_4i(), idna_to_unicode_44i(), idna_to_unicode_8z8z(), stringprep(), and stringprep_ucs4_nfkc_normalize().
int stringprep_unichar_to_utf8 | ( | uint32_t | c, | |
char * | outbuf | |||
) |
stringprep_unichar_to_utf8 - convert Unicode code point to UTF-8
c | a ISO10646 character code | |
outbuf | output buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to . |
Return value: number of bytes written.
char* stringprep_utf8_nfkc_normalize | ( | const char * | str, | |
ssize_t | len | |||
) |
stringprep_utf8_nfkc_normalize - normalize Unicode string
str | a UTF-8 encoded string. | |
len | length of , in bytes, or -1 if is nul-terminated. |
The normalization mode is NFKC (ALL COMPOSE). It standardizes differences that do not affect the text content, such as the above-mentioned accent representation. It standardizes the "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting information may be lost but for most text operations such characters should be considered the same. It returns a result with composed forms rather than a maximally decomposed form.
Return value: a newly allocated string, that is the NFKC normalized form of .
Definition at line 1031 of file nfkc.c.
References G_NORMALIZE_NFKC.
uint32_t* stringprep_utf8_to_ucs4 | ( | const char * | str, | |
ssize_t | len, | |||
size_t * | items_written | |||
) |
stringprep_utf8_to_ucs4 - convert UTF-8 string to UCS-4
str | a UTF-8 encoded string | |
len | the maximum length of to use. If < 0, then the string is nul-terminated. | |
items_written | location to store the number of characters in the result, or NULL. |
Return value: a pointer to a newly allocated UCS-4 string. This value must be freed with free().
Definition at line 977 of file nfkc.c.
References glong.
Referenced by idna_to_ascii_4i(), idna_to_ascii_8z(), idna_to_unicode_8z4z(), pr29_8z(), stringprep(), and tld_check_8z().
uint32_t stringprep_utf8_to_unichar | ( | const char * | p | ) |
stringprep_utf8_to_unichar - convert UTF-8 to Unicode code point
p | a pointer to Unicode character encoded as UTF-8 |
does
not point to a valid UTF-8 encoded character, results are undefined.Return value: the resulting character.