1 /++ 2 $(H1 @nogc and nothrow Parsing Utilities) 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 Authors: Ilia Ki 6 Copyright: 2020 Ilia Ki, Kaleidic Associates Advisory Limited, Symmetry Investments 7 +/ 8 module mir.parse; 9 10 /++ 11 Parsing position 12 +/ 13 struct ParsePosition 14 { 15 /// 16 string file; 17 /// 0 is for unknown 18 uint line; 19 /// 0 is for unknown 20 uint column; 21 22 /// 23 void toString(W)(scope ref W w) scope const 24 { 25 w.put(file); 26 if (line) 27 { 28 import mir.format: print; 29 w.put("("); 30 w.print(line); 31 if (column) 32 { 33 w.put(","); 34 w.print(column); 35 } 36 w.put(")"); 37 } 38 } 39 } 40 41 /// 42 enum DecimalExponentKey 43 { 44 /// 45 none = 0, 46 /// 47 infinity = 1, 48 /// 49 nan = 2, 50 /// 51 dot = '.' - '0', 52 /// 53 d = 'd' - '0', 54 /// 55 e = 'e' - '0', 56 /// 57 D = 'D' - '0', 58 /// 59 E = 'E' - '0', 60 } 61 62 /// 63 struct DecimalExponentInfo 64 { 65 /// 66 long exponent; 67 /// 68 DecimalExponentKey key; 69 } 70 71 /// `mir.conv: to` extension. 72 version(mir_bignum_test) 73 @safe pure @nogc 74 unittest 75 { 76 import mir.test: should; 77 import mir.conv: to; 78 79 "123.0".to!double.should == 123; 80 "123".to!int.should == 123; 81 "123".to!byte.should == 123; 82 83 import mir.small_string; 84 alias S = SmallString!32; 85 "123.0".SmallString!32.to!double.should == 123; 86 } 87 88 import std.traits: isMutable, isFloatingPoint, isSomeChar, isSigned, isUnsigned, Unsigned; 89 90 /++ 91 Performs `nothrow` and `@nogc` string to native type conversion. 92 93 Returns: 94 parsed value 95 Throws: 96 `nogc` Exception in case of parse error or non-empty remaining input. 97 98 Floating_point: 99 Mir parsing supports up-to quadruple precision. 100 The conversion error is 0 ULP for normal numbers. 101 Subnormal numbers with an exponent greater than or equal to -512 have upper error bound equal to 1 ULP.+/ 102 template fromString(T) 103 if (isMutable!T) 104 { 105 /// 106 T fromString(C)(scope const(C)[] str) 107 if (isSomeChar!C) 108 { 109 import mir.utility: _expect; 110 static immutable excfp = new Exception("fromString failed to parse " ~ T.stringof); 111 112 static if (isFloatingPoint!T) 113 { 114 T value; 115 if (_expect(.fromString(str, value), true)) 116 return value; 117 version (D_Exceptions) 118 { import mir.exception : toMutable; throw excfp.toMutable; } 119 else 120 assert(0); 121 } 122 else 123 { 124 static immutable excne = new Exception("fromString: remaining input is not empty after parsing " ~ T.stringof); 125 126 T value; 127 if (_expect(parse!T(str, value), true)) 128 { 129 if (_expect(str.length == 0, true)) 130 return value; 131 version (D_Exceptions) 132 { import mir.exception : toMutable; throw excne.toMutable; } 133 else 134 assert(0); 135 } 136 else 137 { 138 version (D_Exceptions) 139 { import mir.exception : toMutable; throw excfp.toMutable; } 140 else 141 assert(0); 142 } 143 } 144 } 145 } 146 147 version(unittest) 148 { 149 import core.stdc.stdlib: strtof, strtod, strtold; 150 private auto _assumePure(T)(scope return T t) { 151 import std.traits; 152 enum attrs = functionAttributes!T | FunctionAttribute.pure_; 153 return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t; 154 } 155 156 private static @trusted float _stdc_parse(T : float)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtof)(str.ptr, &endPtr); } 157 private static @trusted double _stdc_parse(T : double)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtod)(str.ptr, &endPtr); } 158 private static @trusted real _stdc_parse(T : real)(string str){ auto endPtr = str.ptr + str.length; return _assumePure(&strtold)(str.ptr, &endPtr); } 159 } 160 161 /// 162 version(mir_bignum_test) 163 @safe pure @nogc unittest 164 { 165 import mir.test; 166 "123".fromString!int.should == 123; 167 168 ".5".fromString!float.should == .5; 169 "12.3".fromString!double.should == 12.3; 170 "12.3".fromString!float.should == 12.3f; 171 "12.3".fromString!real.should == 12.3L; 172 "-12.3e-30".fromString!double.should == -12.3e-30; 173 "2.9802322387695312E-8".fromString!double.should == 2.9802322387695312E-8; 174 175 // default support of underscores 176 "123_456.789_012".fromString!double.should == 123_456.789_012; 177 "12_34_56_78_90_12e-6".fromString!double.should == 123_456.789_012; 178 179 // default support of leading zeros 180 "010".fromString!double.should == 10.0; 181 "000010".fromString!double.should == 10.0; 182 "0000.10".fromString!double.should == 0.1; 183 "0000e10".fromString!double.should == 0; 184 185 version(all) {} else 186 version (TeslAlgoM) {} else 187 { 188 /// Test CTFE support 189 static assert("-123".fromString!int == -123); 190 191 static assert("-12.3e-30".fromString!double == -0x1.f2f280b2414d5p-97); 192 static assert("+12.3e+30".fromString!double == 0x1.367ee3119d2bap+103); 193 194 static assert("1.448997445238699".fromString!double == 0x1.72f17f1f49aadp0); 195 static if (real.mant_dig >= 64) 196 static assert("1.448997445238699".fromString!real == 1.448997445238699L); 197 198 static assert("3.518437208883201171875".fromString!float == 0x1.c25c26p+1); 199 static assert("3.518437208883201171875".fromString!double == 0x1.c25c268497684p+1); 200 static if (real.mant_dig >= 64) 201 static assert("3.518437208883201171875".fromString!real == 0xe.12e13424bb4232fp-2L); 202 } 203 204 void test(string str) 205 { 206 version(CRuntime_DigitalMars) // No precise parsing at all 207 { 208 } 209 else 210 { 211 str.fromString!float.should == str._stdc_parse!float; 212 str.fromString!double.should == str._stdc_parse!double; 213 version (Windows) // No precise real parsing on windows 214 { 215 } 216 else 217 str.fromString!real.should == str._stdc_parse!real; 218 } 219 } 220 221 test("2.5e-324"); 222 223 // large 224 test("1e300"); 225 test("123456789.34567e250"); 226 test("943794359898089732078308743689303290943794359843568973207830874368930329."); 227 228 // min normal 229 test("2.2250738585072014e-308"); 230 231 // subnormals 232 test("5e-324"); 233 test("91e-324"); 234 test("1e-322"); 235 test("13245643e-320"); 236 test("2.22507385851e-308"); 237 test("2.1e-308"); 238 test("4.9406564584124654e-324"); 239 240 // infinity 241 test("1e400"); 242 test("1e309"); 243 test("2e308"); 244 test("1.7976931348624e308"); 245 246 // zero 247 test("0.0"); 248 test("1e-325"); 249 test("1e-326"); 250 test("1e-500"); 251 252 // Triggers the tricky underflow case in AlgorithmM (for f32) 253 test("101e-33"); 254 // Triggers AlgorithmR 255 test("1e23"); 256 // Triggers another path through AlgorithmR 257 test("2075e23"); 258 // ... and yet another. 259 test("8713e-23"); 260 261 // 2^65 - 3, triggers half-to-even with even significand 262 test("36893488147419103229.0"); 263 test("36893488147419103229"); 264 265 test("18446744073709551615."); 266 test("-18446744073709551615."); 267 test("18446744073709551616."); 268 test("-18446744073709551616."); 269 270 // Related DMD Issues: 271 // https://issues.dlang.org/show_bug.cgi?id=20951 272 // https://issues.dlang.org/show_bug.cgi?id=20952 273 // https://issues.dlang.org/show_bug.cgi?id=20953 274 // https://issues.dlang.org/show_bug.cgi?id=20967 275 } 276 277 version(mir_bignum_test) 278 @safe pure unittest 279 { 280 import std.exception: assertThrown; 281 assertThrown("1_".fromString!float); 282 assertThrown("1__2".fromString!float); 283 assertThrown("_1".fromString!float); 284 assertThrown("123_.456".fromString!float); 285 assertThrown("123_e0".fromString!float); 286 assertThrown("123._456".fromString!float); 287 assertThrown("12__34.56".fromString!float); 288 assertThrown("123.456_".fromString!float); 289 assertThrown("-_123.456".fromString!float); 290 assertThrown("_123.456".fromString!float); 291 } 292 293 /++ 294 Performs `nothrow` and `@nogc` string to native type conversion. 295 296 Rseturns: true if success and false otherwise. 297 +/ 298 bool fromString(T, C)(scope const(C)[] str, ref T value) 299 if (isSomeChar!C) 300 { 301 static if (isFloatingPoint!T) 302 { 303 import mir.bignum.decimal: Decimal, DecimalExponentKey; 304 import mir.utility: _expect; 305 306 Decimal!128 decimal = void; 307 DecimalExponentKey key; 308 auto ret = decimal.fromStringImpl(str, key); 309 if (_expect(ret, true)) 310 { 311 value = cast(T) decimal; 312 } 313 return ret; 314 } 315 else 316 { 317 return parse!T(str, value) && str.length == 0; 318 } 319 } 320 321 /// 322 version(mir_test) 323 @safe pure nothrow @nogc unittest 324 { 325 int value; 326 assert("123".fromString(value) && value == 123); 327 } 328 329 /// 330 version(mir_test) 331 @safe pure nothrow @nogc unittest 332 { 333 double value = 0; 334 assert("+Inf".fromString(value) && value == double.infinity); 335 assert("-nan".fromString(value) && value != value); 336 } 337 338 /++ 339 Single character parsing utilities. 340 341 Returns: true if success and false otherwise. 342 +/ 343 bool parse(T, C)(ref scope inout(C)[] str, ref scope T value) 344 if (isSomeChar!C && isSomeChar!T && T.sizeof == C.sizeof) 345 { 346 if (str.length == 0) 347 return false; 348 value = str[0]; 349 str = str[1 .. $]; 350 return true; 351 } 352 353 /// 354 version(mir_test) @safe pure nothrow @nogc 355 unittest 356 { 357 auto s = "str"; 358 char c; 359 assert(parse(s, c)); 360 assert(c == 's'); 361 assert(s == "tr"); 362 } 363 364 /++ 365 Integer parsing utilities. 366 367 Returns: true if success and false otherwise. 368 +/ 369 bool parse(T, C)(ref scope inout(C)[] str, out scope T value) 370 if ((is(T == byte) || is(T == short)) && isSomeChar!C) 371 { 372 int lvalue; 373 auto ret = .parse!(int, C)(str, lvalue); 374 value = cast(T) lvalue; 375 return ret && value == lvalue; 376 } 377 378 bool parse(T, C)(ref scope inout(C)[] str, out scope T value) 379 if ((is(T == ubyte) || is(T == ushort)) && isSomeChar!C) 380 { 381 uint lvalue; 382 auto ret = .parse!(uint, C)(str, lvalue); 383 value = cast(T) lvalue; 384 return ret && value == lvalue; 385 } 386 387 /// 388 version (mir_test) unittest 389 { 390 import mir.test: should; 391 import std.meta: AliasSeq; 392 foreach (T; AliasSeq!( 393 byte, ubyte, short, ushort, 394 int, uint, long, ulong)) 395 { 396 auto str = "123"; 397 T val; 398 assert(parse(str, val)); 399 val.should == 123; 400 str = "0"; 401 assert(parse(str, val)); 402 val.should == 0; 403 str = "9"; 404 assert(parse(str, val)); 405 val.should == 9; 406 str = ""; 407 assert(!parse(str, val)); 408 val.should == 0; 409 str = "text"; 410 assert(!parse(str, val)); 411 val.should == 0; 412 } 413 } 414 415 /// 416 version (mir_test) unittest 417 { 418 import mir.test: should; 419 import mir.conv: to; 420 import std.meta: AliasSeq; 421 foreach (T; AliasSeq!(byte, short, int, long)) 422 { 423 auto str = "-123"; 424 T val; 425 assert(parse(str, val)); 426 val.should == -123; 427 str = "-0"; 428 assert(parse(str, val)); 429 val.should == 0; 430 str = "-9text"; 431 assert(parse(str, val)); 432 val.should == -9; 433 assert(str == "text"); 434 enum m = T.min + 0; 435 str = m.to!string; 436 assert(parse(str, val)); 437 val.should == T.min; 438 } 439 } 440 441 bool parse(T, C)(ref scope inout(C)[] str, scope out T value) 442 if ((isSigned!T || isUnsigned!T) && T.sizeof >= uint.sizeof && isSomeChar!C) 443 { 444 version(LDC) pragma(inline, true); 445 import mir.checkedint: addu, mulu; 446 447 if (str.length == 0) 448 return false; 449 450 Unsigned!T x = str[0] - C('0'); 451 452 static if (isSigned!T) 453 bool sign; 454 455 if (x >= 10) 456 { 457 static if (isSigned!T) 458 { 459 if (x == C('-') - C('0')) 460 { 461 sign = true; 462 goto S; 463 } 464 } 465 466 if (x != C('+') - C('0')) 467 return false; 468 S: 469 str = str[1 .. $]; 470 if (str.length == 0) 471 return false; 472 x = str[0] - C('0'); 473 if (x >= 10) 474 return false; 475 } 476 477 str = str[1 .. $]; 478 479 while (str.length) 480 { 481 uint c = str[0] - C('0'); 482 if (c >= 10) 483 break; 484 str = str[1 .. $]; 485 bool overflow; 486 x = x.mulu(10u, overflow); 487 if (overflow) 488 return false; 489 x = x.addu(c, overflow); 490 if (overflow) 491 return false; 492 } 493 494 static if (isSigned!T) 495 { 496 if (x > Unsigned!T(T.max + sign)) 497 return false; 498 x = sign ? -x : x; 499 } 500 501 value = x; 502 return true; 503 }