00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifdef HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025
00026 #include <stdlib.h>
00027 #include <string.h>
00028
00029 #include "stringprep.h"
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #define gboolean int
00040 #define gchar char
00041 #define guchar unsigned char
00042 #define glong long
00043 #define gint int
00044 #define guint unsigned int
00045 #define gushort unsigned short
00046 #define gint16 int16_t
00047 #define guint16 uint16_t
00048 #define gunichar uint32_t
00049 #define gsize size_t
00050 #define gssize ssize_t
00051 #define g_malloc malloc
00052 #define g_free free
00053 #define GError void
00054 #define g_set_error(a,b,c,d) ((void) 0)
00055 #define g_new(struct_type, n_structs) \
00056 ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))
00057 # if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus)
00058 # define G_STMT_START (void)(
00059 # define G_STMT_END )
00060 # else
00061 # if (defined (sun) || defined (__sun__))
00062 # define G_STMT_START if (1)
00063 # define G_STMT_END else (void)0
00064 # else
00065 # define G_STMT_START do
00066 # define G_STMT_END while (0)
00067 # endif
00068 # endif
00069 #define g_return_val_if_fail(expr,val) G_STMT_START{ (void)0; }G_STMT_END
00070 #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
00071 #define TRUE 1
00072 #define FALSE 0
00073
00074
00075
00076 typedef enum
00077 {
00078 G_NORMALIZE_DEFAULT,
00079 G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
00080 G_NORMALIZE_DEFAULT_COMPOSE,
00081 G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
00082 G_NORMALIZE_ALL,
00083 G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
00084 G_NORMALIZE_ALL_COMPOSE,
00085 G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
00086 }
00087 GNormalizeMode;
00088
00089
00090
00091 #define UTF8_COMPUTE(Char, Mask, Len) \
00092 if (Char < 128) \
00093 { \
00094 Len = 1; \
00095 Mask = 0x7f; \
00096 } \
00097 else if ((Char & 0xe0) == 0xc0) \
00098 { \
00099 Len = 2; \
00100 Mask = 0x1f; \
00101 } \
00102 else if ((Char & 0xf0) == 0xe0) \
00103 { \
00104 Len = 3; \
00105 Mask = 0x0f; \
00106 } \
00107 else if ((Char & 0xf8) == 0xf0) \
00108 { \
00109 Len = 4; \
00110 Mask = 0x07; \
00111 } \
00112 else if ((Char & 0xfc) == 0xf8) \
00113 { \
00114 Len = 5; \
00115 Mask = 0x03; \
00116 } \
00117 else if ((Char & 0xfe) == 0xfc) \
00118 { \
00119 Len = 6; \
00120 Mask = 0x01; \
00121 } \
00122 else \
00123 Len = -1;
00124
00125 #define UTF8_LENGTH(Char) \
00126 ((Char) < 0x80 ? 1 : \
00127 ((Char) < 0x800 ? 2 : \
00128 ((Char) < 0x10000 ? 3 : \
00129 ((Char) < 0x200000 ? 4 : \
00130 ((Char) < 0x4000000 ? 5 : 6)))))
00131
00132
00133 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
00134 (Result) = (Chars)[0] & (Mask); \
00135 for ((Count) = 1; (Count) < (Len); ++(Count)) \
00136 { \
00137 if (((Chars)[(Count)] & 0xc0) != 0x80) \
00138 { \
00139 (Result) = -1; \
00140 break; \
00141 } \
00142 (Result) <<= 6; \
00143 (Result) |= ((Chars)[(Count)] & 0x3f); \
00144 }
00145
00146 #define UNICODE_VALID(Char) \
00147 ((Char) < 0x110000 && \
00148 (((Char) & 0xFFFFF800) != 0xD800) && \
00149 ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
00150 ((Char) & 0xFFFE) != 0xFFFE)
00151
00152
00153 static const gchar utf8_skip_data[256] = {
00154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00155 1, 1, 1, 1, 1, 1, 1,
00156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00157 1, 1, 1, 1, 1, 1, 1,
00158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00159 1, 1, 1, 1, 1, 1, 1,
00160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00161 1, 1, 1, 1, 1, 1, 1,
00162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00163 1, 1, 1, 1, 1, 1, 1,
00164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00165 1, 1, 1, 1, 1, 1, 1,
00166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00167 2, 2, 2, 2, 2, 2, 2,
00168 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
00169 5, 5, 5, 6, 6, 1, 1
00170 };
00171
00172 static const gchar *const g_utf8_skip = utf8_skip_data;
00173
00174 #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188 static glong
00189 g_utf8_strlen (const gchar * p, gssize max)
00190 {
00191 glong len = 0;
00192 const gchar *start = p;
00193 g_return_val_if_fail (p != NULL || max == 0, 0);
00194
00195 if (max < 0)
00196 {
00197 while (*p)
00198 {
00199 p = g_utf8_next_char (p);
00200 ++len;
00201 }
00202 }
00203 else
00204 {
00205 if (max == 0 || !*p)
00206 return 0;
00207
00208 p = g_utf8_next_char (p);
00209
00210 while (p - start < max && *p)
00211 {
00212 ++len;
00213 p = g_utf8_next_char (p);
00214 }
00215
00216
00217
00218
00219 if (p - start == max)
00220 ++len;
00221 }
00222
00223 return len;
00224 }
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 static gunichar
00239 g_utf8_get_char (const gchar * p)
00240 {
00241 int i, mask = 0, len;
00242 gunichar result;
00243 unsigned char c = (unsigned char) *p;
00244
00245 UTF8_COMPUTE (c, mask, len);
00246 if (len == -1)
00247 return (gunichar) - 1;
00248 UTF8_GET (result, p, i, mask, len);
00249
00250 return result;
00251 }
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264 static int
00265 g_unichar_to_utf8 (gunichar c, gchar * outbuf)
00266 {
00267 guint len = 0;
00268 int first;
00269 int i;
00270
00271 if (c < 0x80)
00272 {
00273 first = 0;
00274 len = 1;
00275 }
00276 else if (c < 0x800)
00277 {
00278 first = 0xc0;
00279 len = 2;
00280 }
00281 else if (c < 0x10000)
00282 {
00283 first = 0xe0;
00284 len = 3;
00285 }
00286 else if (c < 0x200000)
00287 {
00288 first = 0xf0;
00289 len = 4;
00290 }
00291 else if (c < 0x4000000)
00292 {
00293 first = 0xf8;
00294 len = 5;
00295 }
00296 else
00297 {
00298 first = 0xfc;
00299 len = 6;
00300 }
00301
00302 if (outbuf)
00303 {
00304 for (i = len - 1; i > 0; --i)
00305 {
00306 outbuf[i] = (c & 0x3f) | 0x80;
00307 c >>= 6;
00308 }
00309 outbuf[0] = c | first;
00310 }
00311
00312 return len;
00313 }
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331 static gunichar *
00332 g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
00333 {
00334 gint j, charlen;
00335 gunichar *result;
00336 gint n_chars, i;
00337 const gchar *p;
00338
00339 g_return_val_if_fail (str != NULL, NULL);
00340
00341 p = str;
00342 n_chars = 0;
00343 if (len < 0)
00344 {
00345 while (*p)
00346 {
00347 p = g_utf8_next_char (p);
00348 ++n_chars;
00349 }
00350 }
00351 else
00352 {
00353 while (p < str + len && *p)
00354 {
00355 p = g_utf8_next_char (p);
00356 ++n_chars;
00357 }
00358 }
00359
00360 result = g_new (gunichar, n_chars + 1);
00361 if (!result)
00362 return NULL;
00363
00364 p = str;
00365 for (i = 0; i < n_chars; i++)
00366 {
00367 gunichar wc = ((unsigned char *) p)[0];
00368
00369 if (wc < 0x80)
00370 {
00371 result[i] = wc;
00372 p++;
00373 }
00374 else
00375 {
00376 if (wc < 0xe0)
00377 {
00378 charlen = 2;
00379 wc &= 0x1f;
00380 }
00381 else if (wc < 0xf0)
00382 {
00383 charlen = 3;
00384 wc &= 0x0f;
00385 }
00386 else if (wc < 0xf8)
00387 {
00388 charlen = 4;
00389 wc &= 0x07;
00390 }
00391 else if (wc < 0xfc)
00392 {
00393 charlen = 5;
00394 wc &= 0x03;
00395 }
00396 else
00397 {
00398 charlen = 6;
00399 wc &= 0x01;
00400 }
00401
00402 for (j = 1; j < charlen; j++)
00403 {
00404 wc <<= 6;
00405 wc |= ((unsigned char *) p)[j] & 0x3f;
00406 }
00407
00408 result[i] = wc;
00409 p += charlen;
00410 }
00411 }
00412 result[i] = 0;
00413
00414 if (items_written)
00415 *items_written = i;
00416
00417 return result;
00418 }
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441 static gchar *
00442 g_ucs4_to_utf8 (const gunichar * str,
00443 glong len,
00444 glong * items_read, glong * items_written, GError ** error)
00445 {
00446 gint result_length;
00447 gchar *result = NULL;
00448 gchar *p;
00449 gint i;
00450
00451 result_length = 0;
00452 for (i = 0; len < 0 || i < len; i++)
00453 {
00454 if (!str[i])
00455 break;
00456
00457 if (str[i] >= 0x80000000)
00458 {
00459 if (items_read)
00460 *items_read = i;
00461
00462 g_set_error (error, G_CONVERT_ERROR,
00463 G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
00464 _("Character out of range for UTF-8"));
00465 goto err_out;
00466 }
00467
00468 result_length += UTF8_LENGTH (str[i]);
00469 }
00470
00471 result = g_malloc (result_length + 1);
00472 if (!result)
00473 return NULL;
00474 p = result;
00475
00476 i = 0;
00477 while (p < result + result_length)
00478 p += g_unichar_to_utf8 (str[i++], p);
00479
00480 *p = '\0';
00481
00482 if (items_written)
00483 *items_written = p - result;
00484
00485 err_out:
00486 if (items_read)
00487 *items_read = i;
00488
00489 return result;
00490 }
00491
00492
00493
00494 #include "gunidecomp.h"
00495 #include "gunicomp.h"
00496
00497 #define CC_PART1(Page, Char) \
00498 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00499 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00500 : (cclass_data[combining_class_table_part1[Page]][Char]))
00501
00502 #define CC_PART2(Page, Char) \
00503 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00504 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00505 : (cclass_data[combining_class_table_part2[Page]][Char]))
00506
00507 #define COMBINING_CLASS(Char) \
00508 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
00509 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
00510 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
00511 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
00512 : 0))
00513
00514
00515 #define SBase 0xAC00
00516 #define LBase 0x1100
00517 #define VBase 0x1161
00518 #define TBase 0x11A7
00519 #define LCount 19
00520 #define VCount 21
00521 #define TCount 28
00522 #define NCount (VCount * TCount)
00523 #define SCount (LCount * NCount)
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535 static void
00536 g_unicode_canonical_ordering (gunichar * string, gsize len)
00537 {
00538 gsize i;
00539 int swap = 1;
00540
00541 while (swap)
00542 {
00543 int last;
00544 swap = 0;
00545 last = COMBINING_CLASS (string[0]);
00546 for (i = 0; i < len - 1; ++i)
00547 {
00548 int next = COMBINING_CLASS (string[i + 1]);
00549 if (next != 0 && last > next)
00550 {
00551 gsize j;
00552
00553 for (j = i + 1; j > 0; --j)
00554 {
00555 gunichar t;
00556 if (COMBINING_CLASS (string[j - 1]) <= next)
00557 break;
00558 t = string[j];
00559 string[j] = string[j - 1];
00560 string[j - 1] = t;
00561 swap = 1;
00562 }
00563
00564
00565 next = last;
00566 }
00567 last = next;
00568 }
00569 }
00570 }
00571
00572
00573
00574
00575
00576 static void
00577 decompose_hangul (gunichar s, gunichar * r, gsize * result_len)
00578 {
00579 gint SIndex = s - SBase;
00580
00581
00582 if (SIndex < 0 || SIndex >= SCount)
00583 {
00584 if (r)
00585 r[0] = s;
00586 *result_len = 1;
00587 }
00588 else
00589 {
00590 gunichar L = LBase + SIndex / NCount;
00591 gunichar V = VBase + (SIndex % NCount) / TCount;
00592 gunichar T = TBase + SIndex % TCount;
00593
00594 if (r)
00595 {
00596 r[0] = L;
00597 r[1] = V;
00598 }
00599
00600 if (T != TBase)
00601 {
00602 if (r)
00603 r[2] = T;
00604 *result_len = 3;
00605 }
00606 else
00607 *result_len = 2;
00608 }
00609 }
00610
00611
00612 static const gchar *
00613 find_decomposition (gunichar ch, gboolean compat)
00614 {
00615 int start = 0;
00616 int end = G_N_ELEMENTS (decomp_table);
00617
00618 if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
00619 {
00620 while (TRUE)
00621 {
00622 int half = (start + end) / 2;
00623 if (ch == decomp_table[half].ch)
00624 {
00625 int offset;
00626
00627 if (compat)
00628 {
00629 offset = decomp_table[half].compat_offset;
00630 if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
00631 offset = decomp_table[half].canon_offset;
00632 }
00633 else
00634 {
00635 offset = decomp_table[half].canon_offset;
00636 if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
00637 return NULL;
00638 }
00639
00640 return &(decomp_expansion_string[offset]);
00641 }
00642 else if (half == start)
00643 break;
00644 else if (ch > decomp_table[half].ch)
00645 start = half;
00646 else
00647 end = half;
00648 }
00649 }
00650
00651 return NULL;
00652 }
00653
00654
00655 static gboolean
00656 combine_hangul (gunichar a, gunichar b, gunichar * result)
00657 {
00658 gint LIndex = a - LBase;
00659 gint SIndex = a - SBase;
00660
00661 gint VIndex = b - VBase;
00662 gint TIndex = b - TBase;
00663
00664 if (0 <= LIndex && LIndex < LCount && 0 <= VIndex && VIndex < VCount)
00665 {
00666 *result = SBase + (LIndex * VCount + VIndex) * TCount;
00667 return TRUE;
00668 }
00669 else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
00670 && 0 <= TIndex && TIndex <= TCount)
00671 {
00672 *result = a + TIndex;
00673 return TRUE;
00674 }
00675
00676 return FALSE;
00677 }
00678
00679 #define CI(Page, Char) \
00680 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00681 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00682 : (compose_data[compose_table[Page]][Char]))
00683
00684 #define COMPOSE_INDEX(Char) \
00685 ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
00686
00687 static gboolean
00688 combine (gunichar a, gunichar b, gunichar * result)
00689 {
00690 gushort index_a, index_b;
00691
00692 if (combine_hangul (a, b, result))
00693 return TRUE;
00694
00695 index_a = COMPOSE_INDEX (a);
00696
00697 if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
00698 {
00699 if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
00700 {
00701 *result =
00702 compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
00703 return TRUE;
00704 }
00705 else
00706 return FALSE;
00707 }
00708
00709 index_b = COMPOSE_INDEX (b);
00710
00711 if (index_b >= COMPOSE_SECOND_SINGLE_START)
00712 {
00713 if (a ==
00714 compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
00715 {
00716 *result =
00717 compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
00718 return TRUE;
00719 }
00720 else
00721 return FALSE;
00722 }
00723
00724 if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
00725 && index_b >= COMPOSE_SECOND_START
00726 && index_b < COMPOSE_SECOND_SINGLE_START)
00727 {
00728 gunichar res =
00729 compose_array[index_a - COMPOSE_FIRST_START][index_b -
00730 COMPOSE_SECOND_START];
00731
00732 if (res)
00733 {
00734 *result = res;
00735 return TRUE;
00736 }
00737 }
00738
00739 return FALSE;
00740 }
00741
00742 static gunichar *
00743 _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
00744 {
00745 gsize n_wc;
00746 gunichar *wc_buffer;
00747 const char *p;
00748 gsize last_start;
00749 gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
00750 gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);
00751
00752 n_wc = 0;
00753 p = str;
00754 while ((max_len < 0 || p < str + max_len) && *p)
00755 {
00756 const gchar *decomp;
00757 gunichar wc = g_utf8_get_char (p);
00758
00759 if (wc >= 0xac00 && wc <= 0xd7af)
00760 {
00761 gsize result_len;
00762 decompose_hangul (wc, NULL, &result_len);
00763 n_wc += result_len;
00764 }
00765 else
00766 {
00767 decomp = find_decomposition (wc, do_compat);
00768
00769 if (decomp)
00770 n_wc += g_utf8_strlen (decomp, -1);
00771 else
00772 n_wc++;
00773 }
00774
00775 p = g_utf8_next_char (p);
00776 }
00777
00778 wc_buffer = g_new (gunichar, n_wc + 1);
00779 if (!wc_buffer)
00780 return NULL;
00781
00782 last_start = 0;
00783 n_wc = 0;
00784 p = str;
00785 while ((max_len < 0 || p < str + max_len) && *p)
00786 {
00787 gunichar wc = g_utf8_get_char (p);
00788 const gchar *decomp;
00789 int cc;
00790 gsize old_n_wc = n_wc;
00791
00792 if (wc >= 0xac00 && wc <= 0xd7af)
00793 {
00794 gsize result_len;
00795 decompose_hangul (wc, wc_buffer + n_wc, &result_len);
00796 n_wc += result_len;
00797 }
00798 else
00799 {
00800 decomp = find_decomposition (wc, do_compat);
00801
00802 if (decomp)
00803 {
00804 const char *pd;
00805 for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
00806 wc_buffer[n_wc++] = g_utf8_get_char (pd);
00807 }
00808 else
00809 wc_buffer[n_wc++] = wc;
00810 }
00811
00812 if (n_wc > 0)
00813 {
00814 cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
00815
00816 if (cc == 0)
00817 {
00818 g_unicode_canonical_ordering (wc_buffer + last_start,
00819 n_wc - last_start);
00820 last_start = old_n_wc;
00821 }
00822 }
00823
00824 p = g_utf8_next_char (p);
00825 }
00826
00827 if (n_wc > 0)
00828 {
00829 g_unicode_canonical_ordering (wc_buffer + last_start,
00830 n_wc - last_start);
00831 last_start = n_wc;
00832 }
00833
00834 wc_buffer[n_wc] = 0;
00835
00836
00837
00838 if (do_compose && n_wc > 0)
00839 {
00840 gsize i, j;
00841 int last_cc = 0;
00842 last_start = 0;
00843
00844 for (i = 0; i < n_wc; i++)
00845 {
00846 int cc = COMBINING_CLASS (wc_buffer[i]);
00847
00848 if (i > 0 &&
00849 (last_cc == 0 || last_cc != cc) &&
00850 combine (wc_buffer[last_start], wc_buffer[i],
00851 &wc_buffer[last_start]))
00852 {
00853 for (j = i + 1; j < n_wc; j++)
00854 wc_buffer[j - 1] = wc_buffer[j];
00855 n_wc--;
00856 i--;
00857
00858 if (i == last_start)
00859 last_cc = 0;
00860 else
00861 last_cc = COMBINING_CLASS (wc_buffer[i - 1]);
00862
00863 continue;
00864 }
00865
00866 if (cc == 0)
00867 last_start = i;
00868
00869 last_cc = cc;
00870 }
00871 }
00872
00873 wc_buffer[n_wc] = 0;
00874
00875 return wc_buffer;
00876 }
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914 static gchar *
00915 g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode)
00916 {
00917 gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
00918 gchar *result;
00919
00920 result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
00921 g_free (result_wc);
00922
00923 return result;
00924 }
00925
00926
00927
00938 uint32_t
00939 stringprep_utf8_to_unichar (const char *p)
00940 {
00941 return g_utf8_get_char (p);
00942 }
00943
00955 int
00956 stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
00957 {
00958 return g_unichar_to_utf8 (c, outbuf);
00959 }
00960
00976 uint32_t *
00977 stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written)
00978 {
00979 return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written);
00980 }
00981
01000 char *
01001 stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len,
01002 size_t * items_read, size_t * items_written)
01003 {
01004 return g_ucs4_to_utf8 (str, len, (glong *) items_read,
01005 (glong *) items_written, NULL);
01006 }
01007
01030 char *
01031 stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)
01032 {
01033 return g_utf8_normalize (str, len, G_NORMALIZE_NFKC);
01034 }
01035
01047 uint32_t *
01048 stringprep_ucs4_nfkc_normalize (uint32_t * str, ssize_t len)
01049 {
01050 char *p;
01051 uint32_t *result_wc;
01052
01053 p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
01054 result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
01055 free (p);
01056
01057 return result_wc;
01058 }