The OpenD Programming Language

1 module requests.idna;
2 
3 import std.format;
4 import std.exception;
5 import std.uni;
6 import std.ascii;
7 import std.range;
8 import std.algorithm;
9 import std.regex;
10 import std.functional;
11 
12 static import requests.idna.punycode;
13 
14 
15 private {
16     static immutable _alabel_prefix = "xn--";
17     static immutable _unicode_dots_re = "[\\.\u002e\u3002\uff0e\uff61]";
18 }
19 
20 class IDNAException: Exception {
21     this(string msg, string file = __FILE__, size_t line = __LINE__) pure @safe {
22         super(msg, file, line);
23     }
24 }
25 
26 bool valid_label_length(string label) pure nothrow @nogc @safe {
27     return label.length <= 63;
28 }
29 
30 bool valid_string_length(string label, bool trailing_dot = false) pure nothrow @safe @nogc {
31      return label.length <= (trailing_dot ? 254 : 253);
32 }
33 
34 bool check_hyphen_ok(string label) pure @safe {
35     if ( label[0] == '-' || label[$-1] == '-' ) {
36         throw new IDNAException("Label can't start or ends with hyphen");
37     }
38     if ( label.length>=4 && label[2..4] == "--" ) {
39         throw new IDNAException("Label can't have hyphens in 3 and 4 positions");
40     }
41     return true;
42 }
43 
44 bool check_nfc(string label) @safe {
45     if ( label !is normalize(label) ) {
46         throw new IDNAException("label %s is not normalized".format(label));
47     }
48     return true;
49 }
50 
51 bool check_initial_combiner(string label) pure @safe {
52     if ( combiningClass(label.front) ) {
53         throw new IDNAException("Label begins with an illegal combining character");
54     }
55     return true;
56 }
57 
58 bool check_label(string label) @safe {
59 
60     if ( label.length == 0 ) {
61         throw new IDNAException("Empty label");    
62     }
63     check_hyphen_ok(label);
64     check_nfc(label);
65     check_initial_combiner(label);
66 
67     return true;
68 }
69 
70 string alabel(string label) @safe {
71     // convert u-label to a-label
72     check_label(label);
73     auto result = _alabel_prefix ~ requests.idna.punycode.encode(label);
74     if ( !valid_label_length(result) ) {
75         throw new IDNAException("Label %s too long".format(result));
76     }
77     return result;
78 }
79 
80 string encode_label(string label) @safe {
81     if ( label.count!(not!isASCII) == 0 )
82         return label;
83     return alabel(label);
84 }
85 
86 string idn_encode(string domain) @safe {
87     if ( domain.count!(not!isASCII) == 0 )
88         return domain;
89     auto src = domain.toLower;
90     auto ulabels = src.splitter(regex(_unicode_dots_re));
91     string encoded = ulabels.map!encode_label.join(".");
92     if ( !valid_string_length(encoded) ) {
93         throw new IDNAException("Encoded domain name is too long");
94     }
95     return encoded;
96 }
97 
98 unittest {
99     import std.stdio;
100     import std.array;
101 
102     immutable tld_strings = [
103         ["\u6d4b\u8bd5", "xn--0zwm56d"],
104         ["\u092a\u0930\u0940\u0915\u094d\u0937\u093e", "xn--11b5bs3a9aj6g"],
105         ["\ud55c\uad6d", "xn--3e0b707e"],
106         ["\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435", "xn--80akhbyknj4f"],
107         ["\u0441\u0440\u0431", "xn--90a3ac"],
108         ["\ud14c\uc2a4\ud2b8", "xn--9t4b11yi5a"],
109         ["\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd", "xn--clchc0ea0b2g2a9gcd"],
110         ["\u05d8\u05e2\u05e1\u05d8", "xn--deba0ad"],
111         ["\u4e2d\u56fd", "xn--fiqs8s"],
112         ["\u4e2d\u570b", "xn--fiqz9s"],
113         ["\u0c2d\u0c3e\u0c30\u0c24\u0c4d", "xn--fpcrj9c3d"],
114         ["\u6e2c\u8a66", "xn--g6w251d"],
115         ["\u0aad\u0abe\u0ab0\u0aa4", "xn--gecrj9c"],
116         ["\u092d\u093e\u0930\u0924", "xn--h2brj9c"],
117         ["\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", "xn--hgbk6aj7f53bba"],
118         ["\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", "xn--hlcj6aya9esc7a"],
119         ["\u0443\u043a\u0440", "xn--j1amh"],
120         ["\u9999\u6e2f", "xn--j6w193g"],
121         ["\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", "xn--jxalpdlp"],
122         ["\u0625\u062e\u062a\u0628\u0627\u0631", "xn--kgbechtv"],
123         ["\u53f0\u6e7e", "xn--kprw13d"],
124         ["\u53f0\u7063", "xn--kpry57d"],
125         ["\u0627\u0644\u062c\u0632\u0627\u0626\u0631", "xn--lgbbat1ad8j"],
126         ["\u0639\u0645\u0627\u0646", "xn--mgb9awbf"],
127         ["\u0627\u06cc\u0631\u0627\u0646", "xn--mgba3a4f16a"],
128         ["\u0627\u0645\u0627\u0631\u0627\u062a", "xn--mgbaam7a8h"],
129         ["\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", "xn--mgbai9azgqp6j"],
130         ["\u0627\u0644\u0627\u0631\u062f\u0646", "xn--mgbayh7gpa"],
131         ["\u0628\u06be\u0627\u0631\u062a", "xn--mgbbh1a71e"],
132         ["\u0627\u0644\u0645\u063a\u0631\u0628", "xn--mgbc0a9azcg"],
133         ["\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", "xn--mgberp4a5d4ar"],
134         ["\u10d2\u10d4", "xn--node"],
135         ["\u0e44\u0e17\u0e22", "xn--o3cw4h"],
136         ["\u0633\u0648\u0631\u064a\u0629", "xn--ogbpf8fl"],
137         ["\u0440\u0444", "xn--p1ai"],
138         ["\u062a\u0648\u0646\u0633", "xn--pgbs0dh"],
139         ["\u0645\u0635\u0631", "xn--wgbh1c"],
140         ["\u0642\u0637\u0631", "xn--wgbl6a"],
141         ["\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", "xn--xkc2al3hye2a"],
142         ["\u65b0\u52a0\u5761", "xn--yfro4i67o"],
143         ["\u0641\u0644\u0633\u0637\u064a\u0646", "xn--ygbi2ammx"],
144         ["\u30c6\u30b9\u30c8", "xn--zckzah"],
145         ["\u049b\u0430\u0437", "xn--80ao21a"],
146         ["\u0645\u0644\u064a\u0633\u064a\u0627", "xn--mgbx4cd0ab"],
147         ["\u043c\u043e\u043d", "xn--l1acc"],
148         ["\u0633\u0648\u062f\u0627\u0646", "xn--mgbpl2fh"]
149         //
150         // these strings do not pass normalization test
151         //
152         //["\u0dbd\u0d82\u0d9a\u0dcf", "xn--fzc2c9e2c"],
153         //["\u09ad\u09be\u09b0\u09a4", "xn--45brj9c"],
154         //["\u09ac\u09be\u0982\u09b2\u09be", "xn--54b7fta0cc"],
155         //["\u0a2d\u0a3e\u0a30\u0a24", "xn--s9brj9c"],
156         //["\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", "xn--xkc2dl3a5ee0h"],
157     ];
158     assert(valid_label_length("abc"));
159     assert(!valid_label_length("a".replicate(64)));
160 
161     assert(valid_string_length("a".replicate(253)));
162     assert(!valid_string_length("a".replicate(254)));
163     assert(valid_string_length("a".replicate(254), true));
164     
165     assert(check_hyphen_ok("ab"));
166     assertThrown!IDNAException(check_hyphen_ok("-abcd"));
167     assertThrown!IDNAException(check_hyphen_ok("abcd-"));
168     assertThrown!IDNAException(check_hyphen_ok("ab--cd"));
169     
170     assert(check_nfc("привіт"));
171     assert(check_nfc("\u03D3"));
172     assertThrown!IDNAException(check_nfc("\u03D2\u0301"));
173 
174     assert(check_initial_combiner("n\u0303"));
175     assertThrown!IDNAException(check_initial_combiner("\u0303n"));
176 
177     foreach(p; tld_strings) {
178         string u = p[0];
179         string a = p[1];
180         assert(alabel(u) == a);
181     }
182     assert(toLower("Тест") == "тест");
183     assert(idn_encode("abc.de") == "abc.de");
184     assert(idn_encode("тест") != "тест");
185     assert(idn_encode("\u30c6\u30b9\u30c8.xn--zckzah") == "xn--zckzah.xn--zckzah");
186     assert(idn_encode("\u30c6\u30b9\u30c8\uff0e\u30c6\u30b9\u30c8") == "xn--zckzah.xn--zckzah");
187     assert(idn_encode("\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa") == "xn---------90gglbagaar.aa");
188 }