The library contains a generic Stringprep implementation that does Unicode 3.2 NFKC normalization, mapping and prohibitation of characters, and bidirectional character handling. Profiles for Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII Compatible Encoding (ACE) via IDNA are supported. A mechanism to define Top-Level Domain (TLD) specific validation tables, and to compare strings against those tables, is included. Default tables for some TLDs are also included.
The Stringprep API consists of two main functions, one for converting data from the system's native representation into UTF-8, and one function to perform the Stringprep processing. Adding a new Stringprep profile for your application within the API is straightforward. The Punycode API consists of one encoding function and one decoding function. The IDNA API consists of the ToASCII and ToUnicode functions, as well as an high-level interface for converting entire domain names to and from the ACE encoded form. The TLD API consists of one set of functions to extract the TLD name from a domain string, one set of functions to locate the proper TLD table to use based on the TLD name, and core functions to validate a string against a TLD table, and some utility wrappers to perform all the steps in one call.
The library is used by, e.g., GNU SASL and Shishi to process user names and passwords. Libidn can be built into GNU Libc to enable a new system-wide getaddrinfo() flag for IDN processing.
Libidn is developed for the GNU/Linux system, but runs on over 20 Unix platforms (including Solaris, IRIX, AIX, and Tru64) and Windows. Libidn is written in C and (parts of) the API is accessible from C, C++, Emacs Lisp, Python and Java.
The project web page:
http://www.gnu.org/software/libidn/
The software archive:
ftp://alpha.gnu.org/pub/gnu/libidn/
For more information see:
http://www.ietf.org/html.charters/idn-charter.html
http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)
http://www.ietf.org/rfc/rfc3490.txt (idna specification)
http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)
http://www.ietf.org/rfc/rfc3492.txt (punycode specification)
http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt
http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
Further information and paid contract development:
Simon Josefsson <simon@josefsson.org>
/* example.c --- Example code showing how to use stringprep(). * Copyright (C) 2002, 2003, 2004 Simon Josefsson * * This file is part of GNU Libidn. * * GNU Libidn is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GNU Libidn is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <locale.h> /* setlocale() */ #include <stringprep.h> /* * Compiling using libtool and pkg-config is recommended: * * $ libtool cc -o example example.c `pkg-config --cflags --libs libidn` * $ ./example * Input string encoded as `ISO-8859-1': ª * Before locale2utf8 (length 2): aa 0a * Before stringprep (length 3): c2 aa 0a * After stringprep (length 2): 61 0a * $ * */ int main (int argc, char *argv[]) { char buf[BUFSIZ]; char *p; int rc; size_t i; setlocale (LC_ALL, ""); printf ("Input string encoded as `%s': ", stringprep_locale_charset ()); fflush (stdout); fgets (buf, BUFSIZ, stdin); printf ("Before locale2utf8 (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); p = stringprep_locale_to_utf8 (buf); if (p) { strcpy (buf, p); free (p); } else printf ("Could not convert string to UTF-8, continuing anyway...\n"); printf ("Before stringprep (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); rc = stringprep (buf, BUFSIZ, 0, stringprep_nameprep); if (rc != STRINGPREP_OK) printf ("Stringprep failed (%d): %s\n", rc, stringprep_strerror (rc)); else { printf ("After stringprep (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); } return 0; }
/* example3.c --- Example ToASCII() code showing how to use Libidn. * Copyright (C) 2002, 2003, 2004, 2006 Simon Josefsson * * This file is part of GNU Libidn. * * GNU Libidn is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GNU Libidn is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <locale.h> /* setlocale() */ #include <stringprep.h> /* stringprep_locale_charset() */ #include <idna.h> /* idna_to_ascii_lz() */ /* * Compiling using libtool and pkg-config is recommended: * * $ libtool cc -o example3 example3.c `pkg-config --cflags --libs libidn` * $ ./example3 * Input domain encoded as `ISO-8859-1': www.räksmörgåsª.example * Read string (length 23): 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 aa 2e 65 78 61 6d 70 6c 65 * ACE label (length 33): 'www.xn--rksmrgsa-0zap8p.example' * 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65 * $ * */ int main (int argc, char *argv[]) { char buf[BUFSIZ]; char *p; int rc; size_t i; setlocale (LC_ALL, ""); printf ("Input domain encoded as `%s': ", stringprep_locale_charset ()); fflush (stdout); fgets (buf, BUFSIZ, stdin); buf[strlen (buf) - 1] = '\0'; printf ("Read string (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); rc = idna_to_ascii_lz (buf, &p, 0); if (rc != IDNA_SUCCESS) { printf ("ToASCII() failed (%d): %s\n", rc, idna_strerror (rc)); exit (1); } printf ("ACE label (length %d): '%s'\n", strlen (p), p); for (i = 0; i < strlen (p); i++) printf ("%02x ", p[i] & 0xFF); printf ("\n"); free (p); return 0; }
/* example4.c --- Example ToUnicode() code showing how to use Libidn. * Copyright (C) 2002, 2003, 2004, 2006 Simon Josefsson * * This file is part of GNU Libidn. * * GNU Libidn is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GNU Libidn is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <locale.h> /* setlocale() */ #include <stringprep.h> /* stringprep_locale_charset() */ #include <idna.h> /* idna_to_unicode_lzlz() */ /* * Compiling using libtool and pkg-config is recommended: * * $ libtool cc -o example4 example4.c `pkg-config --cflags --libs libidn` * $ ./example4 * Input domain encoded as `ISO-8859-1': www.xn--rksmrgsa-0zap8p.example * Read string (length 33): 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65 * ACE label (length 23): 'www.räksmörgåsa.example' * 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 61 2e 65 78 61 6d 70 6c 65 * $ * */ int main (int argc, char *argv[]) { char buf[BUFSIZ]; char *p; int rc; size_t i; setlocale (LC_ALL, ""); printf ("Input domain encoded as `%s': ", stringprep_locale_charset ()); fflush (stdout); fgets (buf, BUFSIZ, stdin); buf[strlen (buf) - 1] = '\0'; printf ("Read string (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); rc = idna_to_unicode_lzlz (buf, &p, 0); if (rc != IDNA_SUCCESS) { printf ("ToUnicode() failed (%d): %s\n", rc, idna_strerror (rc)); exit (1); } printf ("ACE label (length %d): '%s'\n", strlen (p), p); for (i = 0; i < strlen (p); i++) printf ("%02x ", p[i] & 0xFF); printf ("\n"); free (p); return 0; }
/* example5.c --- Example TLD checking. * Copyright (C) 2004 Simon Josefsson * * This file is part of GNU Libidn. * * GNU Libidn is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GNU Libidn is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ #include <stdio.h> #include <stdlib.h> #include <string.h> /* Get stringprep_locale_charset, etc. */ #include <stringprep.h> /* Get idna_to_ascii_8z, etc. */ #include <idna.h> /* Get tld_check_4z. */ #include <tld.h> /* * Compiling using libtool and pkg-config is recommended: * * $ libtool cc -o example5 example5.c `pkg-config --cflags --libs libidn` * $ ./example5 * Input domain encoded as `UTF-8': fooß.no * Read string (length 8): 66 6f 6f c3 9f 2e 6e 6f * ToASCII string (length 8): fooss.no * ToUnicode string: U+0066 U+006f U+006f U+0073 U+0073 U+002e U+006e U+006f * Domain accepted by TLD check * * $ ./example5 * Input domain encoded as `UTF-8': gr€€n.no * Read string (length 12): 67 72 e2 82 ac e2 82 ac 6e 2e 6e 6f * ToASCII string (length 16): xn--grn-l50aa.no * ToUnicode string: U+0067 U+0072 U+20ac U+20ac U+006e U+002e U+006e U+006f * Domain rejected by TLD check, Unicode position 2 * */ int main (int argc, char *argv[]) { char buf[BUFSIZ]; char *p; uint32_t *r; int rc; size_t errpos, i; printf ("Input domain encoded as `%s': ", stringprep_locale_charset ()); fflush (stdout); fgets (buf, BUFSIZ, stdin); buf[strlen (buf) - 1] = '\0'; printf ("Read string (length %d): ", strlen (buf)); for (i = 0; i < strlen (buf); i++) printf ("%02x ", buf[i] & 0xFF); printf ("\n"); p = stringprep_locale_to_utf8 (buf); if (p) { strcpy (buf, p); free (p); } else printf ("Could not convert string to UTF-8, continuing anyway...\n"); rc = idna_to_ascii_8z (buf, &p, 0); if (rc != IDNA_SUCCESS) { printf ("idna_to_ascii_8z failed (%d): %s\n", rc, idna_strerror (rc)); return 2; } printf ("ToASCII string (length %d): %s\n", strlen (p), p); rc = idna_to_unicode_8z4z (p, &r, 0); free (p); if (rc != IDNA_SUCCESS) { printf ("idna_to_unicode_8z4z failed (%d): %s\n", rc, idna_strerror (rc)); return 2; } printf ("ToUnicode string: "); for (i = 0; r[i]; i++) printf ("U+%04x ", r[i]); printf ("\n"); rc = tld_check_4z (r, &errpos, NULL); free (r); if (rc == TLD_INVALID) { printf ("Domain rejected by TLD check, Unicode position %d\n", errpos); return 1; } else if (rc != TLD_SUCCESS) { printf ("tld_check_4z() failed (%d): %s\n", rc, tld_strerror (rc)); return 2; } printf ("Domain accepted by TLD check\n"); return 0; }