00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 #ifdef HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025 
00026 #include <stdlib.h>
00027 #include <string.h>
00028 
00029 #include "stringprep.h"
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 #define gboolean int
00040 #define gchar char
00041 #define guchar unsigned char
00042 #define glong long
00043 #define gint int
00044 #define guint unsigned int
00045 #define gushort unsigned short
00046 #define gint16 int16_t
00047 #define guint16 uint16_t
00048 #define gunichar uint32_t
00049 #define gsize size_t
00050 #define gssize ssize_t
00051 #define g_malloc malloc
00052 #define g_free free
00053 #define GError void
00054 #define g_set_error(a,b,c,d) ((void) 0)
00055 #define g_new(struct_type, n_structs)                                   \
00056   ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))
00057 #  if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus)
00058 #    define G_STMT_START        (void)(
00059 #    define G_STMT_END          )
00060 #  else
00061 #    if (defined (sun) || defined (__sun__))
00062 #      define G_STMT_START      if (1)
00063 #      define G_STMT_END        else (void)0
00064 #    else
00065 #      define G_STMT_START      do
00066 #      define G_STMT_END        while (0)
00067 #    endif
00068 #  endif
00069 #define g_return_val_if_fail(expr,val)          G_STMT_START{ (void)0; }G_STMT_END
00070 #define G_N_ELEMENTS(arr)               (sizeof (arr) / sizeof ((arr)[0]))
00071 #define TRUE 1
00072 #define FALSE 0
00073 
00074 
00075 
00076 typedef enum
00077 {
00078   G_NORMALIZE_DEFAULT,
00079   G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
00080   G_NORMALIZE_DEFAULT_COMPOSE,
00081   G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
00082   G_NORMALIZE_ALL,
00083   G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
00084   G_NORMALIZE_ALL_COMPOSE,
00085   G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
00086 }
00087 GNormalizeMode;
00088 
00089 
00090 
00091 #define UTF8_COMPUTE(Char, Mask, Len)           \
00092   if (Char < 128)                               \
00093     {                                           \
00094       Len = 1;                                  \
00095       Mask = 0x7f;                              \
00096     }                                           \
00097   else if ((Char & 0xe0) == 0xc0)               \
00098     {                                           \
00099       Len = 2;                                  \
00100       Mask = 0x1f;                              \
00101     }                                           \
00102   else if ((Char & 0xf0) == 0xe0)               \
00103     {                                           \
00104       Len = 3;                                  \
00105       Mask = 0x0f;                              \
00106     }                                           \
00107   else if ((Char & 0xf8) == 0xf0)               \
00108     {                                           \
00109       Len = 4;                                  \
00110       Mask = 0x07;                              \
00111     }                                           \
00112   else if ((Char & 0xfc) == 0xf8)               \
00113     {                                           \
00114       Len = 5;                                  \
00115       Mask = 0x03;                              \
00116     }                                           \
00117   else if ((Char & 0xfe) == 0xfc)               \
00118     {                                           \
00119       Len = 6;                                  \
00120       Mask = 0x01;                              \
00121     }                                           \
00122   else                                          \
00123     Len = -1;
00124 
00125 #define UTF8_LENGTH(Char)                       \
00126   ((Char) < 0x80 ? 1 :                          \
00127    ((Char) < 0x800 ? 2 :                        \
00128     ((Char) < 0x10000 ? 3 :                     \
00129      ((Char) < 0x200000 ? 4 :                   \
00130       ((Char) < 0x4000000 ? 5 : 6)))))
00131 
00132 
00133 #define UTF8_GET(Result, Chars, Count, Mask, Len)       \
00134   (Result) = (Chars)[0] & (Mask);                       \
00135   for ((Count) = 1; (Count) < (Len); ++(Count))         \
00136     {                                                   \
00137       if (((Chars)[(Count)] & 0xc0) != 0x80)            \
00138         {                                               \
00139           (Result) = -1;                                \
00140           break;                                        \
00141         }                                               \
00142       (Result) <<= 6;                                   \
00143       (Result) |= ((Chars)[(Count)] & 0x3f);            \
00144     }
00145 
00146 #define UNICODE_VALID(Char)                     \
00147   ((Char) < 0x110000 &&                         \
00148    (((Char) & 0xFFFFF800) != 0xD800) &&         \
00149    ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&      \
00150    ((Char) & 0xFFFE) != 0xFFFE)
00151 
00152 
00153 static const gchar utf8_skip_data[256] = {
00154   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00155   1, 1, 1, 1, 1, 1, 1,
00156   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00157   1, 1, 1, 1, 1, 1, 1,
00158   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00159   1, 1, 1, 1, 1, 1, 1,
00160   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00161   1, 1, 1, 1, 1, 1, 1,
00162   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00163   1, 1, 1, 1, 1, 1, 1,
00164   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00165   1, 1, 1, 1, 1, 1, 1,
00166   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00167   2, 2, 2, 2, 2, 2, 2,
00168   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
00169   5, 5, 5, 6, 6, 1, 1
00170 };
00171 
00172 static const gchar *const g_utf8_skip = utf8_skip_data;
00173 
00174 #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
00175 
00176 
00177 
00178 
00179 
00180 
00181 
00182 
00183 
00184 
00185 
00186 
00187 
00188 static glong
00189 g_utf8_strlen (const gchar * p, gssize max)
00190 {
00191   glong len = 0;
00192   const gchar *start = p;
00193   g_return_val_if_fail (p != NULL || max == 0, 0);
00194 
00195   if (max < 0)
00196     {
00197       while (*p)
00198         {
00199           p = g_utf8_next_char (p);
00200           ++len;
00201         }
00202     }
00203   else
00204     {
00205       if (max == 0 || !*p)
00206         return 0;
00207 
00208       p = g_utf8_next_char (p);
00209 
00210       while (p - start < max && *p)
00211         {
00212           ++len;
00213           p = g_utf8_next_char (p);
00214         }
00215 
00216       
00217 
00218 
00219       if (p - start == max)
00220         ++len;
00221     }
00222 
00223   return len;
00224 }
00225 
00226 
00227 
00228 
00229 
00230 
00231 
00232 
00233 
00234 
00235 
00236 
00237 
00238 static gunichar
00239 g_utf8_get_char (const gchar * p)
00240 {
00241   int i, mask = 0, len;
00242   gunichar result;
00243   unsigned char c = (unsigned char) *p;
00244 
00245   UTF8_COMPUTE (c, mask, len);
00246   if (len == -1)
00247     return (gunichar) - 1;
00248   UTF8_GET (result, p, i, mask, len);
00249 
00250   return result;
00251 }
00252 
00253 
00254 
00255 
00256 
00257 
00258 
00259 
00260 
00261 
00262 
00263 
00264 static int
00265 g_unichar_to_utf8 (gunichar c, gchar * outbuf)
00266 {
00267   guint len = 0;
00268   int first;
00269   int i;
00270 
00271   if (c < 0x80)
00272     {
00273       first = 0;
00274       len = 1;
00275     }
00276   else if (c < 0x800)
00277     {
00278       first = 0xc0;
00279       len = 2;
00280     }
00281   else if (c < 0x10000)
00282     {
00283       first = 0xe0;
00284       len = 3;
00285     }
00286   else if (c < 0x200000)
00287     {
00288       first = 0xf0;
00289       len = 4;
00290     }
00291   else if (c < 0x4000000)
00292     {
00293       first = 0xf8;
00294       len = 5;
00295     }
00296   else
00297     {
00298       first = 0xfc;
00299       len = 6;
00300     }
00301 
00302   if (outbuf)
00303     {
00304       for (i = len - 1; i > 0; --i)
00305         {
00306           outbuf[i] = (c & 0x3f) | 0x80;
00307           c >>= 6;
00308         }
00309       outbuf[0] = c | first;
00310     }
00311 
00312   return len;
00313 }
00314 
00315 
00316 
00317 
00318 
00319 
00320 
00321 
00322 
00323 
00324 
00325 
00326 
00327 
00328 
00329 
00330 
00331 static gunichar *
00332 g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
00333 {
00334   gint j, charlen;
00335   gunichar *result;
00336   gint n_chars, i;
00337   const gchar *p;
00338 
00339   g_return_val_if_fail (str != NULL, NULL);
00340 
00341   p = str;
00342   n_chars = 0;
00343   if (len < 0)
00344     {
00345       while (*p)
00346         {
00347           p = g_utf8_next_char (p);
00348           ++n_chars;
00349         }
00350     }
00351   else
00352     {
00353       while (p < str + len && *p)
00354         {
00355           p = g_utf8_next_char (p);
00356           ++n_chars;
00357         }
00358     }
00359 
00360   result = g_new (gunichar, n_chars + 1);
00361   if (!result)
00362     return NULL;
00363 
00364   p = str;
00365   for (i = 0; i < n_chars; i++)
00366     {
00367       gunichar wc = ((unsigned char *) p)[0];
00368 
00369       if (wc < 0x80)
00370         {
00371           result[i] = wc;
00372           p++;
00373         }
00374       else
00375         {
00376           if (wc < 0xe0)
00377             {
00378               charlen = 2;
00379               wc &= 0x1f;
00380             }
00381           else if (wc < 0xf0)
00382             {
00383               charlen = 3;
00384               wc &= 0x0f;
00385             }
00386           else if (wc < 0xf8)
00387             {
00388               charlen = 4;
00389               wc &= 0x07;
00390             }
00391           else if (wc < 0xfc)
00392             {
00393               charlen = 5;
00394               wc &= 0x03;
00395             }
00396           else
00397             {
00398               charlen = 6;
00399               wc &= 0x01;
00400             }
00401 
00402           for (j = 1; j < charlen; j++)
00403             {
00404               wc <<= 6;
00405               wc |= ((unsigned char *) p)[j] & 0x3f;
00406             }
00407 
00408           result[i] = wc;
00409           p += charlen;
00410         }
00411     }
00412   result[i] = 0;
00413 
00414   if (items_written)
00415     *items_written = i;
00416 
00417   return result;
00418 }
00419 
00420 
00421 
00422 
00423 
00424 
00425 
00426 
00427 
00428 
00429 
00430 
00431 
00432 
00433 
00434 
00435 
00436 
00437 
00438 
00439 
00440 
00441 static gchar *
00442 g_ucs4_to_utf8 (const gunichar * str,
00443                 glong len,
00444                 glong * items_read, glong * items_written, GError ** error)
00445 {
00446   gint result_length;
00447   gchar *result = NULL;
00448   gchar *p;
00449   gint i;
00450 
00451   result_length = 0;
00452   for (i = 0; len < 0 || i < len; i++)
00453     {
00454       if (!str[i])
00455         break;
00456 
00457       if (str[i] >= 0x80000000)
00458         {
00459           if (items_read)
00460             *items_read = i;
00461 
00462           g_set_error (error, G_CONVERT_ERROR,
00463                        G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
00464                        _("Character out of range for UTF-8"));
00465           goto err_out;
00466         }
00467 
00468       result_length += UTF8_LENGTH (str[i]);
00469     }
00470 
00471   result = g_malloc (result_length + 1);
00472   if (!result)
00473     return NULL;
00474   p = result;
00475 
00476   i = 0;
00477   while (p < result + result_length)
00478     p += g_unichar_to_utf8 (str[i++], p);
00479 
00480   *p = '\0';
00481 
00482   if (items_written)
00483     *items_written = p - result;
00484 
00485 err_out:
00486   if (items_read)
00487     *items_read = i;
00488 
00489   return result;
00490 }
00491 
00492 
00493 
00494 #include "gunidecomp.h"
00495 #include "gunicomp.h"
00496 
00497 #define CC_PART1(Page, Char) \
00498   ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00499    ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00500    : (cclass_data[combining_class_table_part1[Page]][Char]))
00501 
00502 #define CC_PART2(Page, Char) \
00503   ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00504    ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00505    : (cclass_data[combining_class_table_part2[Page]][Char]))
00506 
00507 #define COMBINING_CLASS(Char) \
00508   (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
00509    ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
00510    : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
00511       ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
00512       : 0))
00513 
00514 
00515 #define SBase 0xAC00
00516 #define LBase 0x1100
00517 #define VBase 0x1161
00518 #define TBase 0x11A7
00519 #define LCount 19
00520 #define VCount 21
00521 #define TCount 28
00522 #define NCount (VCount * TCount)
00523 #define SCount (LCount * NCount)
00524 
00525 
00526 
00527 
00528 
00529 
00530 
00531 
00532 
00533 
00534 
00535 static void
00536 g_unicode_canonical_ordering (gunichar * string, gsize len)
00537 {
00538   gsize i;
00539   int swap = 1;
00540 
00541   while (swap)
00542     {
00543       int last;
00544       swap = 0;
00545       last = COMBINING_CLASS (string[0]);
00546       for (i = 0; i < len - 1; ++i)
00547         {
00548           int next = COMBINING_CLASS (string[i + 1]);
00549           if (next != 0 && last > next)
00550             {
00551               gsize j;
00552               
00553               for (j = i + 1; j > 0; --j)
00554                 {
00555                   gunichar t;
00556                   if (COMBINING_CLASS (string[j - 1]) <= next)
00557                     break;
00558                   t = string[j];
00559                   string[j] = string[j - 1];
00560                   string[j - 1] = t;
00561                   swap = 1;
00562                 }
00563               
00564 
00565               next = last;
00566             }
00567           last = next;
00568         }
00569     }
00570 }
00571 
00572 
00573 
00574 
00575 
00576 static void
00577 decompose_hangul (gunichar s, gunichar * r, gsize * result_len)
00578 {
00579   gint SIndex = s - SBase;
00580 
00581   
00582   if (SIndex < 0 || SIndex >= SCount)
00583     {
00584       if (r)
00585         r[0] = s;
00586       *result_len = 1;
00587     }
00588   else
00589     {
00590       gunichar L = LBase + SIndex / NCount;
00591       gunichar V = VBase + (SIndex % NCount) / TCount;
00592       gunichar T = TBase + SIndex % TCount;
00593 
00594       if (r)
00595         {
00596           r[0] = L;
00597           r[1] = V;
00598         }
00599 
00600       if (T != TBase)
00601         {
00602           if (r)
00603             r[2] = T;
00604           *result_len = 3;
00605         }
00606       else
00607         *result_len = 2;
00608     }
00609 }
00610 
00611 
00612 static const gchar *
00613 find_decomposition (gunichar ch, gboolean compat)
00614 {
00615   int start = 0;
00616   int end = G_N_ELEMENTS (decomp_table);
00617 
00618   if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
00619     {
00620       while (TRUE)
00621         {
00622           int half = (start + end) / 2;
00623           if (ch == decomp_table[half].ch)
00624             {
00625               int offset;
00626 
00627               if (compat)
00628                 {
00629                   offset = decomp_table[half].compat_offset;
00630                   if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
00631                     offset = decomp_table[half].canon_offset;
00632                 }
00633               else
00634                 {
00635                   offset = decomp_table[half].canon_offset;
00636                   if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
00637                     return NULL;
00638                 }
00639 
00640               return &(decomp_expansion_string[offset]);
00641             }
00642           else if (half == start)
00643             break;
00644           else if (ch > decomp_table[half].ch)
00645             start = half;
00646           else
00647             end = half;
00648         }
00649     }
00650 
00651   return NULL;
00652 }
00653 
00654 
00655 static gboolean
00656 combine_hangul (gunichar a, gunichar b, gunichar * result)
00657 {
00658   gint LIndex = a - LBase;
00659   gint SIndex = a - SBase;
00660 
00661   gint VIndex = b - VBase;
00662   gint TIndex = b - TBase;
00663 
00664   if (0 <= LIndex && LIndex < LCount && 0 <= VIndex && VIndex < VCount)
00665     {
00666       *result = SBase + (LIndex * VCount + VIndex) * TCount;
00667       return TRUE;
00668     }
00669   else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
00670            && 0 <= TIndex && TIndex <= TCount)
00671     {
00672       *result = a + TIndex;
00673       return TRUE;
00674     }
00675 
00676   return FALSE;
00677 }
00678 
00679 #define CI(Page, Char) \
00680   ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
00681    ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
00682    : (compose_data[compose_table[Page]][Char]))
00683 
00684 #define COMPOSE_INDEX(Char) \
00685      ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
00686 
00687 static gboolean
00688 combine (gunichar a, gunichar b, gunichar * result)
00689 {
00690   gushort index_a, index_b;
00691 
00692   if (combine_hangul (a, b, result))
00693     return TRUE;
00694 
00695   index_a = COMPOSE_INDEX (a);
00696 
00697   if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
00698     {
00699       if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
00700         {
00701           *result =
00702             compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
00703           return TRUE;
00704         }
00705       else
00706         return FALSE;
00707     }
00708 
00709   index_b = COMPOSE_INDEX (b);
00710 
00711   if (index_b >= COMPOSE_SECOND_SINGLE_START)
00712     {
00713       if (a ==
00714           compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
00715         {
00716           *result =
00717             compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
00718           return TRUE;
00719         }
00720       else
00721         return FALSE;
00722     }
00723 
00724   if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
00725       && index_b >= COMPOSE_SECOND_START
00726       && index_b < COMPOSE_SECOND_SINGLE_START)
00727     {
00728       gunichar res =
00729         compose_array[index_a - COMPOSE_FIRST_START][index_b -
00730                                                      COMPOSE_SECOND_START];
00731 
00732       if (res)
00733         {
00734           *result = res;
00735           return TRUE;
00736         }
00737     }
00738 
00739   return FALSE;
00740 }
00741 
00742 static gunichar *
00743 _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
00744 {
00745   gsize n_wc;
00746   gunichar *wc_buffer;
00747   const char *p;
00748   gsize last_start;
00749   gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
00750   gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);
00751 
00752   n_wc = 0;
00753   p = str;
00754   while ((max_len < 0 || p < str + max_len) && *p)
00755     {
00756       const gchar *decomp;
00757       gunichar wc = g_utf8_get_char (p);
00758 
00759       if (wc >= 0xac00 && wc <= 0xd7af)
00760         {
00761           gsize result_len;
00762           decompose_hangul (wc, NULL, &result_len);
00763           n_wc += result_len;
00764         }
00765       else
00766         {
00767           decomp = find_decomposition (wc, do_compat);
00768 
00769           if (decomp)
00770             n_wc += g_utf8_strlen (decomp, -1);
00771           else
00772             n_wc++;
00773         }
00774 
00775       p = g_utf8_next_char (p);
00776     }
00777 
00778   wc_buffer = g_new (gunichar, n_wc + 1);
00779   if (!wc_buffer)
00780     return NULL;
00781 
00782   last_start = 0;
00783   n_wc = 0;
00784   p = str;
00785   while ((max_len < 0 || p < str + max_len) && *p)
00786     {
00787       gunichar wc = g_utf8_get_char (p);
00788       const gchar *decomp;
00789       int cc;
00790       gsize old_n_wc = n_wc;
00791 
00792       if (wc >= 0xac00 && wc <= 0xd7af)
00793         {
00794           gsize result_len;
00795           decompose_hangul (wc, wc_buffer + n_wc, &result_len);
00796           n_wc += result_len;
00797         }
00798       else
00799         {
00800           decomp = find_decomposition (wc, do_compat);
00801 
00802           if (decomp)
00803             {
00804               const char *pd;
00805               for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
00806                 wc_buffer[n_wc++] = g_utf8_get_char (pd);
00807             }
00808           else
00809             wc_buffer[n_wc++] = wc;
00810         }
00811 
00812       if (n_wc > 0)
00813         {
00814           cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
00815 
00816           if (cc == 0)
00817             {
00818               g_unicode_canonical_ordering (wc_buffer + last_start,
00819                                             n_wc - last_start);
00820               last_start = old_n_wc;
00821             }
00822         }
00823 
00824       p = g_utf8_next_char (p);
00825     }
00826 
00827   if (n_wc > 0)
00828     {
00829       g_unicode_canonical_ordering (wc_buffer + last_start,
00830                                     n_wc - last_start);
00831       last_start = n_wc;
00832     }
00833 
00834   wc_buffer[n_wc] = 0;
00835 
00836   
00837 
00838   if (do_compose && n_wc > 0)
00839     {
00840       gsize i, j;
00841       int last_cc = 0;
00842       last_start = 0;
00843 
00844       for (i = 0; i < n_wc; i++)
00845         {
00846           int cc = COMBINING_CLASS (wc_buffer[i]);
00847 
00848           if (i > 0 &&
00849               (last_cc == 0 || last_cc != cc) &&
00850               combine (wc_buffer[last_start], wc_buffer[i],
00851                        &wc_buffer[last_start]))
00852             {
00853               for (j = i + 1; j < n_wc; j++)
00854                 wc_buffer[j - 1] = wc_buffer[j];
00855               n_wc--;
00856               i--;
00857 
00858               if (i == last_start)
00859                 last_cc = 0;
00860               else
00861                 last_cc = COMBINING_CLASS (wc_buffer[i - 1]);
00862 
00863               continue;
00864             }
00865 
00866           if (cc == 0)
00867             last_start = i;
00868 
00869           last_cc = cc;
00870         }
00871     }
00872 
00873   wc_buffer[n_wc] = 0;
00874 
00875   return wc_buffer;
00876 }
00877 
00878 
00879 
00880 
00881 
00882 
00883 
00884 
00885 
00886 
00887 
00888 
00889 
00890 
00891 
00892 
00893 
00894 
00895 
00896 
00897 
00898 
00899 
00900 
00901 
00902 
00903 
00904 
00905 
00906 
00907 
00908 
00909 
00910 
00911 
00912 
00913 
00914 static gchar *
00915 g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode)
00916 {
00917   gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
00918   gchar *result;
00919 
00920   result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
00921   g_free (result_wc);
00922 
00923   return result;
00924 }
00925 
00926 
00927 
00938 uint32_t
00939 stringprep_utf8_to_unichar (const char *p)
00940 {
00941   return g_utf8_get_char (p);
00942 }
00943 
00955 int
00956 stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
00957 {
00958   return g_unichar_to_utf8 (c, outbuf);
00959 }
00960 
00976 uint32_t *
00977 stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written)
00978 {
00979   return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written);
00980 }
00981 
01000 char *
01001 stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len,
01002                          size_t * items_read, size_t * items_written)
01003 {
01004   return g_ucs4_to_utf8 (str, len, (glong *) items_read,
01005                          (glong *) items_written, NULL);
01006 }
01007 
01030 char *
01031 stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)
01032 {
01033   return g_utf8_normalize (str, len, G_NORMALIZE_NFKC);
01034 }
01035 
01047 uint32_t *
01048 stringprep_ucs4_nfkc_normalize (uint32_t * str, ssize_t len)
01049 {
01050   char *p;
01051   uint32_t *result_wc;
01052 
01053   p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
01054   result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
01055   free (p);
01056 
01057   return result_wc;
01058 }