stringprep.c

Go to the documentation of this file.
00001 /* stringprep.c --- Core stringprep implementation.
00002  * Copyright (C) 2002, 2003, 2004  Simon Josefsson
00003  *
00004  * This file is part of GNU Libidn.
00005  *
00006  * GNU Libidn is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * GNU Libidn is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with GNU Libidn; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
00019  *
00020  */
00021 
00022 #ifdef HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025 
00026 #include <stdlib.h>
00027 #include <string.h>
00028 
00029 #include "stringprep.h"
00030 
00031 static ssize_t
00032 stringprep_find_character_in_table (uint32_t ucs4,
00033                                     const Stringprep_table_element * table)
00034 {
00035   ssize_t i;
00036 
00037   /* This is where typical uses of Libidn spends very close to all CPU
00038      time and causes most cache misses.  One could easily do a binary
00039      search instead.  Before rewriting this, I want hard evidence this
00040      slowness is at all relevant in typical applications.  (I don't
00041      dispute optimization may improve matters significantly, I'm
00042      mostly interested in having someone give real-world benchmark on
00043      the impact of libidn.) */
00044 
00045   for (i = 0; table[i].start || table[i].end; i++)
00046     if (ucs4 >= table[i].start &&
00047         ucs4 <= (table[i].end ? table[i].end : table[i].start))
00048       return i;
00049 
00050   return -1;
00051 }
00052 
00053 static ssize_t
00054 stringprep_find_string_in_table (uint32_t * ucs4,
00055                                  size_t ucs4len,
00056                                  size_t * tablepos,
00057                                  const Stringprep_table_element * table)
00058 {
00059   size_t j;
00060   ssize_t pos;
00061 
00062   for (j = 0; j < ucs4len; j++)
00063     if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
00064       {
00065         if (tablepos)
00066           *tablepos = pos;
00067         return j;
00068       }
00069 
00070   return -1;
00071 }
00072 
00073 static int
00074 stringprep_apply_table_to_string (uint32_t * ucs4,
00075                                   size_t * ucs4len,
00076                                   size_t maxucs4len,
00077                                   const Stringprep_table_element * table)
00078 {
00079   ssize_t pos;
00080   size_t i, maplen;
00081 
00082   while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
00083                                                  &i, table)) != -1)
00084     {
00085       for (maplen = STRINGPREP_MAX_MAP_CHARS;
00086            maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
00087         ;
00088 
00089       if (*ucs4len - 1 + maplen >= maxucs4len)
00090         return STRINGPREP_TOO_SMALL_BUFFER;
00091 
00092       memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
00093                sizeof (uint32_t) * (*ucs4len - pos - 1));
00094       memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
00095       *ucs4len = *ucs4len - 1 + maplen;
00096     }
00097 
00098   return STRINGPREP_OK;
00099 }
00100 
00101 #define INVERTED(x) ((x) & ((~0UL) >> 1))
00102 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
00103   ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
00104    ( INVERTED(profileflags) && (profileflags & flags)))
00105 
00137 int
00138 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
00139                Stringprep_profile_flags flags,
00140                const Stringprep_profile * profile)
00141 {
00142   size_t i, j;
00143   ssize_t k;
00144   size_t ucs4len = *len;
00145   int rc;
00146 
00147   for (i = 0; profile[i].operation; i++)
00148     {
00149       switch (profile[i].operation)
00150         {
00151         case STRINGPREP_NFKC:
00152           {
00153             uint32_t *q = 0;
00154 
00155             if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00156               break;
00157 
00158             if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
00159               /* Profile requires NFKC, but callee asked for no NFKC. */
00160               return STRINGPREP_FLAG_ERROR;
00161 
00162             q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
00163             if (!q)
00164               return STRINGPREP_NFKC_FAILED;
00165 
00166             for (ucs4len = 0; q[ucs4len]; ucs4len++)
00167               ;
00168 
00169             if (ucs4len >= maxucs4len)
00170               {
00171                 free (q);
00172                 return STRINGPREP_TOO_SMALL_BUFFER;
00173               }
00174 
00175             memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
00176 
00177             free (q);
00178           }
00179           break;
00180 
00181         case STRINGPREP_PROHIBIT_TABLE:
00182           k = stringprep_find_string_in_table (ucs4, ucs4len,
00183                                                NULL, profile[i].table);
00184           if (k != -1)
00185             return STRINGPREP_CONTAINS_PROHIBITED;
00186           break;
00187 
00188         case STRINGPREP_UNASSIGNED_TABLE:
00189           if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00190             break;
00191           if (flags & STRINGPREP_NO_UNASSIGNED)
00192             {
00193               k = stringprep_find_string_in_table
00194                 (ucs4, ucs4len, NULL, profile[i].table);
00195               if (k != -1)
00196                 return STRINGPREP_CONTAINS_UNASSIGNED;
00197             }
00198           break;
00199 
00200         case STRINGPREP_MAP_TABLE:
00201           if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00202             break;
00203           rc = stringprep_apply_table_to_string
00204             (ucs4, &ucs4len, maxucs4len, profile[i].table);
00205           if (rc != STRINGPREP_OK)
00206             return rc;
00207           break;
00208 
00209         case STRINGPREP_BIDI_PROHIBIT_TABLE:
00210         case STRINGPREP_BIDI_RAL_TABLE:
00211         case STRINGPREP_BIDI_L_TABLE:
00212           break;
00213 
00214         case STRINGPREP_BIDI:
00215           {
00216             int done_prohibited = 0;
00217             int done_ral = 0;
00218             int done_l = 0;
00219             int contains_ral = -1;
00220             int contains_l = -1;
00221 
00222             for (j = 0; profile[j].operation; j++)
00223               if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
00224                 {
00225                   done_prohibited = 1;
00226                   k = stringprep_find_string_in_table (ucs4, ucs4len,
00227                                                        NULL,
00228                                                        profile[j].table);
00229                   if (k != -1)
00230                     return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
00231                 }
00232               else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
00233                 {
00234                   done_ral = 1;
00235                   if (stringprep_find_string_in_table
00236                       (ucs4, ucs4len, NULL, profile[j].table) != -1)
00237                     contains_ral = j;
00238                 }
00239               else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
00240                 {
00241                   done_l = 1;
00242                   if (stringprep_find_string_in_table
00243                       (ucs4, ucs4len, NULL, profile[j].table) != -1)
00244                     contains_l = j;
00245                 }
00246 
00247             if (!done_prohibited || !done_ral || !done_l)
00248               return STRINGPREP_PROFILE_ERROR;
00249 
00250             if (contains_ral != -1 && contains_l != -1)
00251               return STRINGPREP_BIDI_BOTH_L_AND_RAL;
00252 
00253             if (contains_ral != -1)
00254               {
00255                 if (!(stringprep_find_character_in_table
00256                       (ucs4[0], profile[contains_ral].table) != -1 &&
00257                       stringprep_find_character_in_table
00258                       (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
00259                   return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
00260               }
00261           }
00262           break;
00263 
00264         default:
00265           return STRINGPREP_PROFILE_ERROR;
00266           break;
00267         }
00268     }
00269 
00270   *len = ucs4len;
00271 
00272   return STRINGPREP_OK;
00273 }
00274 
00275 static int
00276 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
00277                   Stringprep_profile_flags flags,
00278                   const Stringprep_profile * profile)
00279 {
00280   int rc;
00281 
00282   rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00283   if (rc != STRINGPREP_OK)
00284     return rc;
00285 
00286   if (ucs4len >= maxucs4len)
00287     return STRINGPREP_TOO_SMALL_BUFFER;
00288 
00289   ucs4[ucs4len] = 0;
00290 
00291   return STRINGPREP_OK;
00292 }
00293 
00318 int
00319 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
00320                 Stringprep_profile_flags flags,
00321                 const Stringprep_profile * profile)
00322 {
00323   size_t ucs4len;
00324 
00325   for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
00326     ;
00327 
00328   return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
00329 }
00330 
00358 int
00359 stringprep (char *in,
00360             size_t maxlen,
00361             Stringprep_profile_flags flags,
00362             const Stringprep_profile * profile)
00363 {
00364   int rc;
00365   char *utf8 = NULL;
00366   uint32_t *ucs4 = NULL;
00367   size_t ucs4len, maxucs4len, adducs4len = 50;
00368 
00369   do
00370     {
00371       uint32_t *newp;
00372 
00373       if (ucs4)
00374         free (ucs4);
00375       ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
00376       maxucs4len = ucs4len + adducs4len;
00377       newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
00378       if (!newp)
00379         {
00380           free (ucs4);
00381           return STRINGPREP_MALLOC_ERROR;
00382         }
00383       ucs4 = newp;
00384 
00385       rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00386       adducs4len += 50;
00387     }
00388   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00389   if (rc != STRINGPREP_OK)
00390     {
00391       free (ucs4);
00392       return rc;
00393     }
00394 
00395   utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
00396   free (ucs4);
00397   if (!utf8)
00398     return STRINGPREP_MALLOC_ERROR;
00399 
00400   if (strlen (utf8) >= maxlen)
00401     {
00402       free (utf8);
00403       return STRINGPREP_TOO_SMALL_BUFFER;
00404     }
00405 
00406   strcpy (in, utf8);            /* flawfinder: ignore */
00407 
00408   free (utf8);
00409 
00410   return STRINGPREP_OK;
00411 }
00412 
00437 int
00438 stringprep_profile (const char *in,
00439                     char **out,
00440                     const char *profile, Stringprep_profile_flags flags)
00441 {
00442   const Stringprep_profiles *p;
00443   char *str = NULL;
00444   size_t len = strlen (in) + 1;
00445   int rc;
00446 
00447   for (p = &stringprep_profiles[0]; p->name; p++)
00448     if (strcmp (p->name, profile) == 0)
00449       break;
00450 
00451   if (!p || !p->name || !p->tables)
00452     return STRINGPREP_UNKNOWN_PROFILE;
00453 
00454   do
00455     {
00456       if (str)
00457         free (str);
00458       str = (char *) malloc (len);
00459       if (str == NULL)
00460         return STRINGPREP_MALLOC_ERROR;
00461 
00462       strcpy (str, in);
00463 
00464       rc = stringprep (str, len, flags, p->tables);
00465       len += 50;
00466     }
00467   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00468 
00469   if (rc == STRINGPREP_OK)
00470     *out = str;
00471   else
00472     free (str);
00473 
00474   return rc;
00475 }
00476 

Generated on Wed Sep 13 10:20:31 2006 for libidn by  doxygen 1.4.7