The OpenD Programming Language

1 /++
2 $(H1 @nogc and nothrow Parsing Utilities)
3 
4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
5 Authors: Ilia Ki
6 Copyright: 2020 Ilia Ki, Kaleidic Associates Advisory Limited, Symmetry Investments
7 +/
8 module mir.parse;
9 
10 /++
11 Parsing position
12 +/
13 struct ParsePosition
14 {
15     ///
16     string file;
17     /// 0 is for unknown
18     uint line;
19     /// 0 is for unknown
20     uint column;
21 
22     ///
23     void toString(W)(scope ref W w) scope const
24     {
25         w.put(file);
26         if (line)
27         {
28             import mir.format: print;
29             w.put("(");
30             w.print(line);
31             if (column)
32             {
33                 w.put(",");
34                 w.print(column);
35             }
36             w.put(")");
37         }
38     }
39 }
40 
41 ///
42 enum DecimalExponentKey
43 {
44     ///
45     none = 0,
46     ///
47     infinity = 1,
48     ///
49     nan = 2,
50     ///
51     dot = '.' - '0',
52     ///
53     d = 'd' - '0',
54     ///
55     e = 'e' - '0',
56     ///
57     D = 'D' - '0',
58     ///
59     E = 'E' - '0',
60 }
61 
62 ///
63 struct DecimalExponentInfo
64 {
65     ///
66     long exponent;
67     ///
68     DecimalExponentKey key;
69 }
70 
71 /// `mir.conv: to` extension.
72 version(mir_bignum_test)
73 @safe pure @nogc
74 unittest
75 {
76     import mir.test: should;
77     import mir.conv: to;
78 
79     "123.0".to!double.should == 123;
80     "123".to!int.should == 123;
81     "123".to!byte.should == 123;
82 
83     import mir.small_string;
84     alias S = SmallString!32;
85     "123.0".SmallString!32.to!double.should == 123;
86 }
87 
88 import std.traits: isMutable, isFloatingPoint, isSomeChar, isSigned, isUnsigned, Unsigned;
89 
90 /++
91 Performs `nothrow` and `@nogc` string to native type conversion.
92 
93 Returns:
94     parsed value
95 Throws:
96     `nogc` Exception in case of parse error or non-empty remaining input.
97 
98 Floating_point:
99     Mir parsing supports up-to quadruple precision.
100 The conversion error is 0 ULP for normal numbers. 
101     Subnormal numbers with an exponent greater than or equal to -512 have upper error bound equal to 1 ULP.+/
102 template fromString(T)
103     if (isMutable!T)
104 {
105     ///
106     T fromString(C)(scope const(C)[] str)
107         if (isSomeChar!C)
108     {
109         import mir.utility: _expect;
110         static immutable excfp = new Exception("fromString failed to parse " ~ T.stringof);
111 
112         static if (isFloatingPoint!T)
113         {
114             T value;
115             if (_expect(.fromString(str, value), true))
116                 return value;
117             version (D_Exceptions)
118                 { import mir.exception : toMutable; throw excfp.toMutable; }
119             else
120                 assert(0);
121         }
122         else
123         {
124             static immutable excne = new Exception("fromString: remaining input is not empty after parsing " ~ T.stringof);
125 
126             T value;
127             if (_expect(parse!T(str, value), true))
128             {
129                 if (_expect(str.length == 0, true))
130                     return value;
131                 version (D_Exceptions)
132                     { import mir.exception : toMutable; throw excne.toMutable; }
133                 else
134                     assert(0);
135             }
136             else
137             {
138                 version (D_Exceptions)
139                     { import mir.exception : toMutable; throw excfp.toMutable; }
140                 else
141                     assert(0);
142             }
143         }
144     }
145 }
146 
147 version(unittest)
148 {
149     import core.stdc.stdlib: strtof, strtod, strtold;
150     private auto _assumePure(T)(scope return T t) {
151         import std.traits;
152         enum attrs = functionAttributes!T | FunctionAttribute.pure_;
153         return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
154     }
155 
156     private static @trusted float _stdc_parse(T : float)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtof)(str.ptr, &endPtr);  }
157     private static @trusted double _stdc_parse(T : double)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtod)(str.ptr, &endPtr);  }
158     private static @trusted real _stdc_parse(T : real)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtold)(str.ptr, &endPtr);  }
159 }
160 
161 ///
162 version(mir_bignum_test)
163 @safe pure @nogc unittest
164 {
165     import mir.test;
166     "123".fromString!int.should == 123;
167 
168     ".5".fromString!float.should == .5;
169     "12.3".fromString!double.should == 12.3;
170     "12.3".fromString!float.should == 12.3f;
171     "12.3".fromString!real.should == 12.3L;
172     "-12.3e-30".fromString!double.should == -12.3e-30;
173     "2.9802322387695312E-8".fromString!double.should == 2.9802322387695312E-8;
174 
175     // default support of underscores
176     "123_456.789_012".fromString!double.should == 123_456.789_012;
177     "12_34_56_78_90_12e-6".fromString!double.should == 123_456.789_012;
178 
179     // default support of leading zeros
180     "010".fromString!double.should == 10.0;
181     "000010".fromString!double.should == 10.0;
182     "0000.10".fromString!double.should == 0.1;
183     "0000e10".fromString!double.should == 0;
184 
185     version(all) {} else
186     version (TeslAlgoM) {} else
187     {
188         /// Test CTFE support  
189         static assert("-123".fromString!int == -123);
190 
191         static assert("-12.3e-30".fromString!double == -0x1.f2f280b2414d5p-97);
192         static assert("+12.3e+30".fromString!double == 0x1.367ee3119d2bap+103);
193 
194         static assert("1.448997445238699".fromString!double == 0x1.72f17f1f49aadp0);
195         static if (real.mant_dig >= 64)
196             static assert("1.448997445238699".fromString!real == 1.448997445238699L);
197 
198         static assert("3.518437208883201171875".fromString!float == 0x1.c25c26p+1);
199         static assert("3.518437208883201171875".fromString!double == 0x1.c25c268497684p+1);
200         static if (real.mant_dig >= 64)
201             static assert("3.518437208883201171875".fromString!real == 0xe.12e13424bb4232fp-2L);    
202     }
203 
204     void test(string str)
205     {
206         version(CRuntime_DigitalMars) // No precise parsing at all
207         {
208         }
209         else
210         {
211             str.fromString!float.should == str._stdc_parse!float;
212             str.fromString!double.should == str._stdc_parse!double;
213             version (Windows) // No precise real parsing on windows
214             {
215             }
216             else
217                 str.fromString!real.should == str._stdc_parse!real;
218         }
219     }
220 
221     test("2.5e-324");
222 
223     // large
224     test("1e300");
225     test("123456789.34567e250");
226     test("943794359898089732078308743689303290943794359843568973207830874368930329.");
227 
228     // min normal
229     test("2.2250738585072014e-308");
230 
231     // subnormals
232     test("5e-324");
233     test("91e-324");
234     test("1e-322");
235     test("13245643e-320");
236     test("2.22507385851e-308");
237     test("2.1e-308");
238     test("4.9406564584124654e-324");
239 
240     // infinity
241     test("1e400");
242     test("1e309");
243     test("2e308");
244     test("1.7976931348624e308");
245 
246     // zero
247     test("0.0");
248     test("1e-325");
249     test("1e-326");
250     test("1e-500");
251 
252     // Triggers the tricky underflow case in AlgorithmM (for f32)
253     test("101e-33");
254     // Triggers AlgorithmR
255     test("1e23");
256     // Triggers another path through AlgorithmR
257     test("2075e23");
258     // ... and yet another.
259     test("8713e-23");
260 
261     // 2^65 - 3, triggers half-to-even with even significand
262     test("36893488147419103229.0");
263     test("36893488147419103229");
264 
265     test("18446744073709551615.");
266     test("-18446744073709551615.");
267     test("18446744073709551616.");
268     test("-18446744073709551616.");
269 
270 //  Related DMD Issues:
271 // https://issues.dlang.org/show_bug.cgi?id=20951
272 // https://issues.dlang.org/show_bug.cgi?id=20952
273 // https://issues.dlang.org/show_bug.cgi?id=20953
274 // https://issues.dlang.org/show_bug.cgi?id=20967
275 }
276 
277 version(mir_bignum_test)
278 @safe pure unittest
279 {
280     import std.exception: assertThrown;
281     assertThrown("1_".fromString!float);
282     assertThrown("1__2".fromString!float);
283     assertThrown("_1".fromString!float);
284     assertThrown("123_.456".fromString!float);
285     assertThrown("123_e0".fromString!float);
286     assertThrown("123._456".fromString!float);
287     assertThrown("12__34.56".fromString!float);
288     assertThrown("123.456_".fromString!float);
289     assertThrown("-_123.456".fromString!float);
290     assertThrown("_123.456".fromString!float);
291 }
292 
293 /++
294 Performs `nothrow` and `@nogc` string to native type conversion.
295 
296 Rseturns: true if success and false otherwise.
297 +/
298 bool fromString(T, C)(scope const(C)[] str, ref T value)
299     if (isSomeChar!C)
300 {
301     static if (isFloatingPoint!T)
302     {
303         import mir.bignum.decimal: Decimal, DecimalExponentKey;
304         import mir.utility: _expect;
305 
306         Decimal!128 decimal = void;
307         DecimalExponentKey key;
308         auto ret = decimal.fromStringImpl(str, key);
309         if (_expect(ret, true))
310         {
311             value = cast(T) decimal;
312         }
313         return ret;
314     }
315     else
316     {
317         return parse!T(str, value) && str.length == 0;
318     }
319 }
320 
321 ///
322 version(mir_test)
323 @safe pure nothrow @nogc unittest
324 {
325     int value;
326     assert("123".fromString(value) && value == 123);
327 }
328 
329 ///
330 version(mir_test)
331 @safe pure nothrow @nogc unittest
332 {
333     double value = 0;
334     assert("+Inf".fromString(value) && value == double.infinity);
335     assert("-nan".fromString(value) && value != value);
336 }
337 
338 /++
339 Single character parsing utilities.
340 
341 Returns: true if success and false otherwise.
342 +/
343 bool parse(T, C)(ref scope inout(C)[] str, ref scope T value)
344     if (isSomeChar!C && isSomeChar!T && T.sizeof == C.sizeof)
345 {
346     if (str.length == 0)
347         return false;
348     value = str[0];
349     str = str[1 .. $];
350     return true;
351 }
352 
353 ///
354 version(mir_test) @safe pure nothrow @nogc
355 unittest
356 {
357     auto s = "str";
358     char c;
359     assert(parse(s, c));
360     assert(c == 's');
361     assert(s == "tr");
362 }
363 
364 /++
365 Integer parsing utilities.
366 
367 Returns: true if success and false otherwise.
368 +/
369 bool parse(T, C)(ref scope inout(C)[] str, out scope T value)
370     if ((is(T == byte) || is(T == short)) && isSomeChar!C)
371 {
372     int lvalue;
373     auto ret = .parse!(int, C)(str, lvalue);
374     value = cast(T) lvalue;
375     return ret && value == lvalue;
376 }
377 
378 bool parse(T, C)(ref scope inout(C)[] str, out scope T value)
379     if ((is(T == ubyte) || is(T == ushort)) && isSomeChar!C)
380 {
381     uint lvalue;
382     auto ret = .parse!(uint, C)(str, lvalue);
383     value = cast(T) lvalue;
384     return ret && value == lvalue;
385 }
386 
387 ///
388 version (mir_test) unittest
389 {
390     import mir.test: should;
391     import std.meta: AliasSeq;
392     foreach (T; AliasSeq!(
393         byte, ubyte, short, ushort,
394         int, uint, long, ulong))
395     {
396         auto str = "123";
397         T val;
398         assert(parse(str, val));
399         val.should == 123;
400         str = "0";
401         assert(parse(str, val));
402         val.should == 0;
403         str = "9";
404         assert(parse(str, val));
405         val.should == 9;
406         str = "";
407         assert(!parse(str, val));
408         val.should == 0;
409         str = "text";
410         assert(!parse(str, val));
411         val.should == 0;
412     }
413 }
414 
415 ///
416 version (mir_test) unittest
417 {
418     import mir.test: should;
419     import mir.conv: to;
420     import std.meta: AliasSeq;
421     foreach (T; AliasSeq!(byte, short, int, long))
422     {
423         auto str = "-123";
424         T val;
425         assert(parse(str, val));
426         val.should == -123;
427         str = "-0";
428         assert(parse(str, val));
429         val.should == 0;
430         str = "-9text";
431         assert(parse(str, val));
432         val.should == -9;
433         assert(str == "text");
434         enum m = T.min + 0;
435         str = m.to!string;
436         assert(parse(str, val));
437         val.should == T.min;
438     }
439 }
440 
441 bool parse(T, C)(ref scope inout(C)[] str, scope out T value)
442     if ((isSigned!T || isUnsigned!T) && T.sizeof >= uint.sizeof && isSomeChar!C)
443 {
444     version(LDC) pragma(inline, true);
445     import mir.checkedint: addu, mulu;
446 
447     if (str.length == 0)
448         return false;
449 
450     Unsigned!T x = str[0] - C('0');
451 
452     static if (isSigned!T)
453         bool sign;
454 
455     if (x >= 10)
456     {
457         static if (isSigned!T)
458         {
459             if (x == C('-') - C('0'))
460             {
461                 sign = true;
462                 goto S;
463             }
464         }
465         
466         if (x != C('+') - C('0'))
467             return false;
468     S:
469         str = str[1 .. $];
470         if (str.length == 0)
471             return false;
472         x = str[0] - C('0');
473         if (x >= 10)
474             return false;
475     }
476 
477     str = str[1 .. $];
478 
479     while (str.length)
480     {
481         uint c = str[0] - C('0');
482         if (c >= 10)
483             break;
484         str = str[1 .. $];
485         bool overflow;
486         x = x.mulu(10u, overflow);
487         if (overflow)
488             return false;
489         x = x.addu(c, overflow);
490         if (overflow)
491             return false;
492     }
493 
494     static if (isSigned!T)
495     {
496         if (x > Unsigned!T(T.max + sign))
497             return false;
498         x = sign ? -x : x;
499     }
500 
501     value = x;
502     return true;
503 }