tld.c

Go to the documentation of this file.
00001 /* tld.c --- Handle TLD restriction checking.
00002  * Copyright (C) 2004  Simon Josefsson.
00003  * Copyright (C) 2003, 2004  Free Software Foundation, Inc.
00004  *
00005  * Author: Thomas Jacob, Internet24.de
00006  *
00007  * This file is part of GNU Libidn.
00008  *
00009  * GNU Libidn is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * GNU Libidn is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with GNU Libidn; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
00022  *
00023  */
00024 
00025 /* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
00026 #include <stringprep.h>
00027 
00028 /* Get strcmp(). */
00029 #include <string.h>
00030 
00031 /* Get specifications. */
00032 #include <tld.h>
00033 
00034 /* Array of built-in domain restriction structures.  See tlds.c.  */
00035 extern const Tld_table *_tld_tables[];
00036 
00049 const Tld_table *
00050 tld_get_table (const char *tld, const Tld_table ** tables)
00051 {
00052   const Tld_table **tldtable = NULL;
00053 
00054   if (!tld || !tables)
00055     return NULL;
00056 
00057   for (tldtable = tables; *tldtable; tldtable++)
00058     if (!strcmp ((*tldtable)->name, tld))
00059       return *tldtable;
00060 
00061   return NULL;
00062 }
00063 
00078 const Tld_table *
00079 tld_default_table (const char *tld, const Tld_table ** overrides)
00080 {
00081   const Tld_table *tldtable = NULL;
00082 
00083   if (!tld)
00084     return NULL;
00085 
00086   if (overrides)
00087     tldtable = tld_get_table (tld, overrides);
00088 
00089   if (!tldtable)
00090     tldtable = tld_get_table (tld, _tld_tables);
00091 
00092   return tldtable;
00093 }
00094 
00095 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||      \
00096                  (c) == 0xFF0E || (c) == 0xFF61)
00097 
00111 int
00112 tld_get_4 (const uint32_t * in, size_t inlen, char **out)
00113 {
00114   const uint32_t *ipos;
00115   size_t olen;
00116 
00117   *out = NULL;
00118   if (!in || inlen == 0)
00119     return TLD_NODATA;
00120 
00121   ipos = &in[inlen - 1];
00122   olen = 0;
00123   /* Scan backwards for non(latin)letters. */
00124   while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
00125                         (*ipos >= 0x61 && *ipos <= 0x7A)))
00126     ipos--, olen++;
00127 
00128   if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */
00129     {
00130       char *out_s = malloc (sizeof (char) * (olen + 1));
00131       char *opos = out_s;
00132 
00133       if (!opos)
00134         return TLD_MALLOC_ERROR;
00135 
00136       ipos++;
00137       /* Transcribe to lowercase ascii string. */
00138       for (; ipos < &in[inlen]; ipos++, opos++)
00139         *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
00140       *opos = 0;
00141       *out = out_s;
00142       return TLD_SUCCESS;
00143     }
00144 
00145   return TLD_NO_TLD;
00146 }
00147 
00159 int
00160 tld_get_4z (const uint32_t * in, char **out)
00161 {
00162   const uint32_t *ipos = in;
00163 
00164   if (!in)
00165     return TLD_NODATA;
00166 
00167   while (*ipos)
00168     ipos++;
00169 
00170   return tld_get_4 (in, ipos - in, out);
00171 }
00172 
00185 int
00186 tld_get_z (const char *in, char **out)
00187 {
00188   uint32_t *iucs;
00189   size_t i, ilen;
00190   int rc;
00191 
00192   ilen = strlen (in);
00193   iucs = calloc (ilen, sizeof (*iucs));
00194 
00195   if (!iucs)
00196     return TLD_MALLOC_ERROR;
00197 
00198   for (i = 0; i < ilen; i++)
00199     iucs[i] = in[i];
00200 
00201   rc = tld_get_4 (iucs, ilen, out);
00202 
00203   free (iucs);
00204 
00205   return rc;
00206 }
00207 
00208 /*
00209  * tld_checkchar - verify that character is permitted
00210  * @ch: 32 bit unicode character to check.
00211  * @tld: A #Tld_table data structure to check @ch against.
00212  *
00213  * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
00214  * character in @tld.
00215  *
00216  * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
00217  *   valid character for the TLD @tld or if @tld is %NULL,
00218  *   %TLD_INVALID if @ch is invalid as defined by @tld.
00219  */
00220 static int
00221 _tld_checkchar (uint32_t ch, const Tld_table * tld)
00222 {
00223   const Tld_table_element *s, *e, *m;
00224 
00225   if (!tld)
00226     return TLD_SUCCESS;
00227 
00228   /* Check for [-a-z0-9.]. */
00229   if ((ch >= 0x61 && ch <= 0x7A) ||
00230       (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
00231     return TLD_SUCCESS;
00232 
00233   s = tld->valid;
00234   e = s + tld->nvalid;
00235   while (s < e)
00236     {
00237       m = s + ((e - s) >> 1);
00238       if (ch < m->start)
00239         e = m;
00240       else if (ch > m->end)
00241         s = m + 1;
00242       else
00243         return TLD_SUCCESS;
00244     }
00245 
00246   return TLD_INVALID;
00247 }
00248 
00268 int
00269 tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos,
00270               const Tld_table * tld)
00271 {
00272   const uint32_t *ipos;
00273   int rc;
00274 
00275   if (!tld)                     /* No data for TLD so everything is valid. */
00276     return TLD_SUCCESS;
00277 
00278   ipos = in;
00279   while (ipos < &in[inlen])
00280     {
00281       rc = _tld_checkchar (*ipos, tld);
00282       if (rc != TLD_SUCCESS)
00283         {
00284           if (errpos)
00285             *errpos = ipos - in;
00286           return rc;
00287         }
00288       ipos++;
00289     }
00290   return TLD_SUCCESS;
00291 }
00292 
00310 int
00311 tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld)
00312 {
00313   const uint32_t *ipos = in;
00314 
00315   if (!ipos)
00316     return TLD_NODATA;
00317 
00318   while (*ipos)
00319     ipos++;
00320 
00321   return tld_check_4t (in, ipos - in, errpos, tld);
00322 }
00323 
00347 int
00348 tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos,
00349              const Tld_table ** overrides)
00350 {
00351   const Tld_table *tld;
00352   char *domain;
00353   int rc;
00354 
00355   if (errpos)
00356     *errpos = 0;
00357 
00358   /* Get TLD name. */
00359   rc = tld_get_4 (in, inlen, &domain);
00360 
00361   if (rc != TLD_SUCCESS)
00362     {
00363       if (rc == TLD_NO_TLD)     /* No TLD, say OK */
00364         return TLD_SUCCESS;
00365       else
00366         return rc;
00367     }
00368 
00369   /* Retrieve appropriate data structure. */
00370   tld = tld_default_table (domain, overrides);
00371   free (domain);
00372 
00373   return tld_check_4t (in, inlen, errpos, tld);
00374 }
00375 
00397 int
00398 tld_check_4z (const uint32_t * in, size_t * errpos,
00399               const Tld_table ** overrides)
00400 {
00401   const uint32_t *ipos = in;
00402 
00403   if (!ipos)
00404     return TLD_NODATA;
00405 
00406   while (*ipos)
00407     ipos++;
00408 
00409   return tld_check_4 (in, ipos - in, errpos, overrides);
00410 }
00411 
00435 int
00436 tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides)
00437 {
00438   uint32_t *iucs;
00439   size_t ilen;
00440   int rc;
00441 
00442   if (!in)
00443     return TLD_NODATA;
00444 
00445   iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
00446 
00447   if (!iucs)
00448     return TLD_MALLOC_ERROR;
00449 
00450   rc = tld_check_4 (iucs, ilen, errpos, overrides);
00451 
00452   free (iucs);
00453 
00454   return rc;
00455 }
00456 
00480 int
00481 tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides)
00482 {
00483   char *utf8;
00484   int rc;
00485 
00486   if (!in)
00487     return TLD_NODATA;
00488 
00489   utf8 = stringprep_locale_to_utf8 (in);
00490   if (!utf8)
00491     return TLD_ICONV_ERROR;
00492 
00493 
00494   rc = tld_check_8z (utf8, errpos, overrides);
00495 
00496   free (utf8);
00497 
00498   return rc;
00499 }
00500 

Generated on Wed Sep 13 10:20:31 2006 for libidn by  doxygen 1.4.7