00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifdef HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025
00026 #include <stdlib.h>
00027 #include <string.h>
00028 #include <stringprep.h>
00029 #include <punycode.h>
00030
00031 #include "idna.h"
00032
00033 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
00034 (c) == 0xFF0E || (c) == 0xFF61)
00035
00036
00037
00069 int
00070 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
00071 {
00072 size_t len, outlen;
00073 uint32_t *src;
00074 int rc;
00075
00076
00077
00078
00079
00080
00081
00082
00083 {
00084 size_t i;
00085 int inasciirange;
00086
00087 inasciirange = 1;
00088 for (i = 0; i < inlen; i++)
00089 if (in[i] > 0x7F)
00090 inasciirange = 0;
00091 if (inasciirange)
00092 {
00093 src = malloc (sizeof (in[0]) * (inlen + 1));
00094 if (src == NULL)
00095 return IDNA_MALLOC_ERROR;
00096
00097 memcpy (src, in, sizeof (in[0]) * inlen);
00098 src[inlen] = 0;
00099
00100 goto step3;
00101 }
00102 }
00103
00104
00105
00106
00107
00108
00109 {
00110 char *p;
00111
00112 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
00113 if (p == NULL)
00114 return IDNA_MALLOC_ERROR;
00115
00116 len = strlen (p);
00117 do
00118 {
00119 char *newp;
00120
00121 len = 2 * len + 10;
00122 newp = realloc (p, len);
00123 if (newp == NULL)
00124 {
00125 free (p);
00126 return IDNA_MALLOC_ERROR;
00127 }
00128 p = newp;
00129
00130 if (flags & IDNA_ALLOW_UNASSIGNED)
00131 rc = stringprep_nameprep (p, len);
00132 else
00133 rc = stringprep_nameprep_no_unassigned (p, len);
00134 }
00135 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00136
00137 if (rc != STRINGPREP_OK)
00138 {
00139 free (p);
00140 return IDNA_STRINGPREP_ERROR;
00141 }
00142
00143 src = stringprep_utf8_to_ucs4 (p, -1, NULL);
00144
00145 free (p);
00146 }
00147
00148 step3:
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160 if (flags & IDNA_USE_STD3_ASCII_RULES)
00161 {
00162 size_t i;
00163
00164 for (i = 0; src[i]; i++)
00165 if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
00166 (src[i] >= 0x3A && src[i] <= 0x40) ||
00167 (src[i] >= 0x5B && src[i] <= 0x60) ||
00168 (src[i] >= 0x7B && src[i] <= 0x7F))
00169 {
00170 free (src);
00171 return IDNA_CONTAINS_NON_LDH;
00172 }
00173
00174 if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
00175 {
00176 free (src);
00177 return IDNA_CONTAINS_MINUS;
00178 }
00179 }
00180
00181
00182
00183
00184
00185
00186 {
00187 size_t i;
00188 int inasciirange;
00189
00190 inasciirange = 1;
00191 for (i = 0; src[i]; i++)
00192 {
00193 if (src[i] > 0x7F)
00194 inasciirange = 0;
00195
00196 if (i < 64)
00197 out[i] = src[i];
00198 }
00199 if (i < 64)
00200 out[i] = '\0';
00201 if (inasciirange)
00202 goto step8;
00203 }
00204
00205
00206
00207
00208
00209
00210 {
00211 size_t i;
00212 int match;
00213
00214 match = 1;
00215 for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
00216 if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
00217 match = 0;
00218 if (match)
00219 {
00220 free (src);
00221 return IDNA_CONTAINS_ACE_PREFIX;
00222 }
00223 }
00224
00225
00226
00227
00228
00229 for (len = 0; src[len]; len++)
00230 ;
00231 src[len] = '\0';
00232 outlen = 63 - strlen (IDNA_ACE_PREFIX);
00233 rc = punycode_encode (len, src, NULL,
00234 &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
00235 if (rc != PUNYCODE_SUCCESS)
00236 {
00237 free (src);
00238 return IDNA_PUNYCODE_ERROR;
00239 }
00240 out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
00241
00242
00243
00244
00245
00246 memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
00247
00248
00249
00250
00251
00252
00253 step8:
00254 free (src);
00255 if (strlen (out) < 1 || strlen (out) > 63)
00256 return IDNA_INVALID_LENGTH;
00257
00258 return IDNA_SUCCESS;
00259 }
00260
00261
00262 static int
00263 idna_to_unicode_internal (char *utf8in,
00264 uint32_t * out, size_t * outlen, int flags)
00265 {
00266 int rc;
00267 char tmpout[64];
00268 size_t utf8len = strlen (utf8in) + 1;
00269 size_t addlen = 0;
00270
00271
00272
00273
00274
00275
00276
00277
00278 {
00279 size_t i;
00280 int inasciirange;
00281
00282 inasciirange = 1;
00283 for (i = 0; utf8in[i]; i++)
00284 if (utf8in[i] & ~0x7F)
00285 inasciirange = 0;
00286 if (inasciirange)
00287 goto step3;
00288 }
00289
00290
00291
00292
00293
00294
00295
00296 do
00297 {
00298 char *newp = realloc (utf8in, utf8len + addlen);
00299 if (newp == NULL)
00300 {
00301 free (utf8in);
00302 return IDNA_MALLOC_ERROR;
00303 }
00304 utf8in = newp;
00305 if (flags & IDNA_ALLOW_UNASSIGNED)
00306 rc = stringprep_nameprep (utf8in, utf8len + addlen);
00307 else
00308 rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
00309 addlen += 1;
00310 }
00311 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00312
00313 if (rc != STRINGPREP_OK)
00314 {
00315 free (utf8in);
00316 return IDNA_STRINGPREP_ERROR;
00317 }
00318
00319
00320
00321
00322
00323 step3:
00324 if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
00325 {
00326 free (utf8in);
00327 return IDNA_NO_ACE_PREFIX;
00328 }
00329
00330
00331
00332
00333 memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
00334 strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
00335
00336
00337
00338
00339
00340
00341 (*outlen)--;
00342
00343 rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
00344 if (rc != PUNYCODE_SUCCESS)
00345 {
00346 free (utf8in);
00347 return IDNA_PUNYCODE_ERROR;
00348 }
00349
00350 out[*outlen] = 0;
00351
00352
00353
00354
00355 rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
00356 if (rc != IDNA_SUCCESS)
00357 {
00358 free (utf8in);
00359 return rc;
00360 }
00361
00362
00363
00364
00365
00366 if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
00367 {
00368 free (utf8in);
00369 return IDNA_ROUNDTRIP_VERIFY_ERROR;
00370 }
00371
00372
00373
00374
00375 free (utf8in);
00376 return IDNA_SUCCESS;
00377 }
00378
00414 int
00415 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
00416 uint32_t * out, size_t * outlen, int flags)
00417 {
00418 int rc;
00419 size_t outlensave = *outlen;
00420 char *p;
00421
00422 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
00423 if (p == NULL)
00424 return IDNA_MALLOC_ERROR;
00425
00426 rc = idna_to_unicode_internal (p, out, outlen, flags);
00427 if (rc != IDNA_SUCCESS)
00428 {
00429 memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
00430 inlen : outlensave));
00431 *outlen = inlen;
00432 }
00433
00434
00435
00436 return rc;
00437 }
00438
00439
00440
00454 int
00455 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
00456 {
00457 const uint32_t *start = input;
00458 const uint32_t *end = input;
00459 char buf[64];
00460 char *out = NULL;
00461 int rc;
00462
00463
00464
00465
00466
00467
00468 if (input[0] == 0)
00469 {
00470
00471 *output = malloc (1);
00472 if (!*output)
00473 return IDNA_MALLOC_ERROR;
00474 strcpy (*output, "");
00475 return IDNA_SUCCESS;
00476 }
00477
00478 if (DOTP (input[0]) && input[1] == 0)
00479 {
00480
00481 *output = malloc (2);
00482 if (!*output)
00483 return IDNA_MALLOC_ERROR;
00484 strcpy (*output, ".");
00485 return IDNA_SUCCESS;
00486 }
00487
00488 *output = NULL;
00489 do
00490 {
00491 end = start;
00492
00493 for (; *end && !DOTP (*end); end++)
00494 ;
00495
00496 if (*end == '\0' && start == end)
00497 {
00498
00499 buf[0] = '\0';
00500 }
00501 else
00502 {
00503 rc = idna_to_ascii_4i (start, end - start, buf, flags);
00504 if (rc != IDNA_SUCCESS)
00505 return rc;
00506 }
00507
00508 if (out)
00509 {
00510 char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
00511 if (!newp)
00512 {
00513 free (out);
00514 return IDNA_MALLOC_ERROR;
00515 }
00516 out = newp;
00517 strcat (out, ".");
00518 strcat (out, buf);
00519 }
00520 else
00521 {
00522 out = (char *) malloc (strlen (buf) + 1);
00523 if (!out)
00524 return IDNA_MALLOC_ERROR;
00525 strcpy (out, buf);
00526 }
00527
00528 start = end + 1;
00529 }
00530 while (*end);
00531
00532 *output = out;
00533
00534 return IDNA_SUCCESS;
00535 }
00536
00550 int
00551 idna_to_ascii_8z (const char *input, char **output, int flags)
00552 {
00553 uint32_t *ucs4;
00554 size_t ucs4len;
00555 int rc;
00556
00557 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00558 if (!ucs4)
00559 return IDNA_ICONV_ERROR;
00560
00561 rc = idna_to_ascii_4z (ucs4, output, flags);
00562
00563 free (ucs4);
00564
00565 return rc;
00566
00567 }
00568
00583 int
00584 idna_to_ascii_lz (const char *input, char **output, int flags)
00585 {
00586 char *utf8;
00587 int rc;
00588
00589 utf8 = stringprep_locale_to_utf8 (input);
00590 if (!utf8)
00591 return IDNA_ICONV_ERROR;
00592
00593 rc = idna_to_ascii_8z (utf8, output, flags);
00594
00595 free (utf8);
00596
00597 return rc;
00598 }
00599
00614 int
00615 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
00616 {
00617 const uint32_t *start = input;
00618 const uint32_t *end = input;
00619 uint32_t *buf;
00620 size_t buflen;
00621 uint32_t *out = NULL;
00622 size_t outlen = 0;
00623 int rc;
00624
00625 *output = NULL;
00626
00627 do
00628 {
00629 end = start;
00630
00631 for (; *end && !DOTP (*end); end++)
00632 ;
00633
00634 buflen = end - start;
00635 buf = malloc (sizeof (buf[0]) * (buflen + 1));
00636 if (!buf)
00637 return IDNA_MALLOC_ERROR;
00638
00639 rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
00640
00641
00642 if (out)
00643 {
00644 uint32_t *newp = realloc (out,
00645 sizeof (out[0])
00646 * (outlen + 1 + buflen + 1));
00647 if (!newp)
00648 {
00649 free (buf);
00650 free (out);
00651 return IDNA_MALLOC_ERROR;
00652 }
00653 out = newp;
00654 out[outlen++] = 0x002E;
00655 memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
00656 outlen += buflen;
00657 out[outlen] = 0x0;
00658 free (buf);
00659 }
00660 else
00661 {
00662 out = buf;
00663 outlen = buflen;
00664 out[outlen] = 0x0;
00665 }
00666
00667 start = end + 1;
00668 }
00669 while (*end);
00670
00671 *output = out;
00672
00673 return IDNA_SUCCESS;
00674 }
00675
00690 int
00691 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
00692 {
00693 uint32_t *ucs4;
00694 size_t ucs4len;
00695 int rc;
00696
00697 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00698 if (!ucs4)
00699 return IDNA_ICONV_ERROR;
00700
00701 rc = idna_to_unicode_4z4z (ucs4, output, flags);
00702 free (ucs4);
00703
00704 return rc;
00705 }
00706
00721 int
00722 idna_to_unicode_8z8z (const char *input, char **output, int flags)
00723 {
00724 uint32_t *ucs4;
00725 int rc;
00726
00727 rc = idna_to_unicode_8z4z (input, &ucs4, flags);
00728 *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
00729 free (ucs4);
00730
00731 if (!*output)
00732 return IDNA_ICONV_ERROR;
00733
00734 return rc;
00735 }
00736
00752 int
00753 idna_to_unicode_8zlz (const char *input, char **output, int flags)
00754 {
00755 char *utf8;
00756 int rc;
00757
00758 rc = idna_to_unicode_8z8z (input, &utf8, flags);
00759 *output = stringprep_utf8_to_locale (utf8);
00760 free (utf8);
00761
00762 if (!*output)
00763 return IDNA_ICONV_ERROR;
00764
00765 return rc;
00766 }
00767
00784 int
00785 idna_to_unicode_lzlz (const char *input, char **output, int flags)
00786 {
00787 char *utf8;
00788 int rc;
00789
00790 utf8 = stringprep_locale_to_utf8 (input);
00791 if (!utf8)
00792 return IDNA_ICONV_ERROR;
00793
00794 rc = idna_to_unicode_8zlz (utf8, output, flags);
00795 free (utf8);
00796
00797 return rc;
00798 }
00799