1 /++ 2 $(H2 CSV/TSV parsing) 3 4 $(LREF CsvProxy) can be serialized to Ion, JSON, MsgPack, or YAML 5 and then deserialized to a specified type. 6 That approachs allows to use the same mir deserialization 7 pattern like for other data types. 8 $(IONREF conv, serde) unifies this two steps throught binary Ion format, 9 which serves as an efficient DOM representation for all other formats. 10 11 Macros: 12 IONREF = $(REF_ALTTEXT $(TT $2), $2, mir, ion, $1)$(NBSP) 13 AlgorithmREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, $1)$(NBSP) 14 NDSLICEREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, ndslice, $1)$(NBSP) 15 AAREF = $(REF_ALTTEXT $(TT $2), $2, mir, algebraic_alias, $1)$(NBSP) 16 +/ 17 18 module mir.csv; 19 20 import mir.primitives: isOutputRange; 21 import mir.serde: SerdeTarget; 22 import mir.ndslice.slice: Slice, SliceKind; 23 import mir.string_map: StringMap; 24 import std.traits: isImplicitlyConvertible; 25 26 /// 27 public import mir.algebraic_alias.csv: CsvAlgebraic; 28 29 30 /++ 31 Rapid CSV reader represented as a range of rows. 32 33 The structure isn't copyable. Please use it's pointer with range modifiers. 34 35 Exactly one call of `empty` has to be preciding each call of `front`. 36 Exactly one call of `popFront` has to be following each call of `front`. 37 Some Phobos functions doesn't follow this rule. 38 39 All elements of the each row have to be accessed exactly once before 40 the next row can be processed. 41 +/ 42 struct CsvReader 43 { 44 import mir.appender: ScopedBuffer, scopedBuffer; 45 import mir.utility: _expect; 46 import mir.string: scanLeftAny; 47 48 /// An input CSV text. BOM isn't supported. 49 const(char)[] text; 50 /// 51 uint nColumns; 52 /// 53 uint rowIndex; 54 /// Scalar separator 55 char separator = ','; 56 /// Symbol to quote scalars 57 char quote = '"'; 58 /// 59 bool fill = true; 60 /// 61 bool skipEmptyLines = true; 62 63 private ScopedBuffer!(char, 128) buffer; 64 65 /++ 66 +/ 67 enum Error 68 { 69 /// 70 none, 71 // /// 72 // missingLeftQuote, 73 /// 74 unexpectedSeparator, 75 /// 76 unexpectedRowEnd, 77 } 78 79 /++ 80 CSV cell element 81 +/ 82 struct Scalar 83 { 84 /++ 85 Unquoted string. 86 87 $(LREF .CsvReader.Scalar.wasQuoted) is set, then the value refers 88 $(LREF .CsvRow.buffer) and valid only until the next quoted string is produced. 89 +/ 90 const(char)[] value; 91 92 bool wasQuoted; 93 /++ 94 If the flag is true the $(LREF .CsvReader.Scalar.value) member refers the $(LREF .CsvRow.buffer) the original text, 95 otherwise it . 96 +/ 97 bool isScopeAllocated; 98 99 /++ 100 +/ 101 Error error; 102 } 103 104 /++ 105 CSV Row Input Range 106 107 Exactly one call of `empty` has to be preciding each call of `front`. 108 Exactly one call of `popFront` has to be following each call of `front`. 109 Some Phobos functions doesn't follow this rule. 110 +/ 111 struct Row 112 { 113 private CsvReader* root; 114 /// 115 uint length; 116 117 /++ 118 Throws: IonMirException if the $(LREF CsvReader.Error) is set. 119 Returns: `void` 120 +/ 121 auto validateCsvError(CsvReader.Error error) 122 scope const @safe pure 123 { 124 import mir.ion.exception: IonMirException; 125 126 final switch (error) 127 { 128 case CsvReader.Error.none: break; 129 // case CsvReader.Error.missingLeftQuote: throw new IonMirException("mir.csv: missing left quote when parsing element at index [", root.rowIndex, ", ", columnIndex, "]"); 130 case CsvReader.Error.unexpectedSeparator: throw new IonMirException("mir.csv: unexpected separator when parsing element at index [", root.rowIndex, ", ", columnIndex, "]"); 131 case CsvReader.Error.unexpectedRowEnd: throw new IonMirException("mir.csv: unexpected row end when parsing element at index [", root.rowIndex, ", ", columnIndex, "]"); 132 } 133 } 134 135 /// 136 bool empty()() scope const pure nothrow @nogc @property 137 in (root) 138 { 139 return length == 0; 140 } 141 142 /++ 143 The function has be called after the front value is precessed. 144 +/ 145 void popFront()() scope pure nothrow @nogc 146 in (root) 147 in (length) 148 { 149 length--; 150 } 151 152 /// 153 Scalar front()() return scope pure nothrow @nogc @property 154 in (root) 155 in (length) 156 // in (length == 1 || root.text.length) 157 { 158 auto scalar = root.readCell(); 159 // if (_expect(!scalar.error, true)) 160 with (root) 161 { 162 if (text.length && text[0] == separator) 163 { 164 text = text.ptr[1 .. text.length]; 165 if (_expect(length == 1, false)) 166 { 167 for(;;) 168 { 169 auto ignored = root.readCell; 170 if (!text.length) 171 break; 172 if (text[0] != separator) 173 goto StripLineEnd; 174 text = text.ptr[1 .. text.length]; 175 } 176 } 177 } 178 else 179 { 180 if (_expect(length != 1, false)) 181 { 182 if (!fill) 183 scalar.error = Error.unexpectedRowEnd; 184 } 185 else 186 if (text.length) 187 { 188 StripLineEnd: 189 text = text.ptr[1 + (text.length > 1 && text[0] == '\r' && text[1] == '\n') .. text.length]; 190 } 191 } 192 } 193 return scalar; 194 } 195 196 uint columnIndex()() scope const @safe pure nothrow @nogc 197 in (root) 198 { 199 return root.nColumns - length; 200 } 201 } 202 203 /// 204 bool empty()() scope pure nothrow @nogc @property 205 { 206 if (skipEmptyLines) 207 { 208 if (text.length) for (;;) 209 { 210 if (text[0] != '\n' && text[0] != '\r') 211 return false; 212 text = text[1 .. $]; 213 if (text.length == 0) 214 return true; 215 } 216 else 217 return true; 218 } 219 else 220 return text.length == 0; 221 } 222 223 /++ 224 The function has be called after the all row cell values have been precessed. 225 +/ 226 void popFront()() scope pure nothrow @nogc 227 { 228 rowIndex++; 229 } 230 231 /// 232 Row front()() scope return pure nothrow @nogc @property 233 { 234 return typeof(return)(&this, nColumns); 235 } 236 237 /++ 238 Throws: throws an exception if the first row is exists and invalid. 239 +/ 240 this( 241 return scope const(char)[] text, 242 char separator = ',', 243 char quote = '"', 244 char comment = '\0', 245 uint skipRows = 0, 246 bool fill = true, 247 bool skipEmptyLines = true, 248 uint nColumns = 0, 249 ) @trusted pure @nogc 250 { 251 pragma(inline, false); 252 253 while (text.length && (skipRows-- || text[0] == comment)) 254 { 255 auto next = text.scanLeftAny('\r', '\n'); 256 text = text[$ - next.length + (next.length >= 1) + (next.length > 1 && next[0] == '\r' && next[1] == '\n') .. $]; 257 } 258 259 this.text = text; 260 this.separator = separator; 261 this.quote = quote; 262 263 if (this.text.length == 0) 264 return; 265 266 if (!nColumns) for (;;) 267 { 268 nColumns++; 269 auto scalar = readCell(); 270 if (scalar.error) 271 { 272 import mir.exception: toMutable; 273 import mir.ion.exception: IonException; 274 static immutable exc = new IonException("mir.csv: left double quote is missing in the first row"); 275 throw exc.toMutable; 276 277 } 278 if (this.text.length && this.text[0] == separator) 279 { 280 this.text = this.text[1 .. $]; 281 continue; 282 } 283 if (this.text.length) 284 this.text = this.text[1 + (this.text.length > 1 && this.text[0] == '\r' && this.text[1] == '\n') .. $]; 285 break; 286 } 287 288 this.nColumns = nColumns; 289 this.text = text; 290 } 291 292 private Scalar readCell() scope return @trusted pure nothrow @nogc 293 { 294 // if skipLeftSpaces// TODO then stripLeft csv 295 auto quoted = text.length && text[0] == quote; 296 if (!quoted) 297 { 298 auto next = text.scanLeftAny(separator, '\r', '\n'); 299 auto ret = text[0 .. text.length - next.length]; 300 text = text.ptr[text.length - next.length .. text.length]; 301 return Scalar(ret); 302 } 303 buffer.reset; 304 305 assert(text.length); 306 assert(text[0] == quote); 307 text = text.ptr[1 .. text.length]; 308 309 for (;;) 310 { 311 auto next = text.scanLeftAny(quote); 312 313 auto isQuote = next.length > 1 && next[1] == quote; 314 auto ret = text[0 .. text.length - next.length + isQuote]; 315 text = text.ptr[text.length - next.length + isQuote + (next.length != 0) .. text.length]; 316 317 if (!isQuote && buffer.data.length == 0) 318 return Scalar(ret, true); 319 320 buffer.put(ret); 321 322 if (!isQuote) 323 return Scalar(buffer.data, true, true); 324 } 325 } 326 } 327 328 /++ 329 Returns: $(NDSLICEREF slice, Slice)`!(string*, 2)`. 330 See_also: $(LREF matrixAsDataFrame) 331 +/ 332 Slice!(string*, 2) csvToStringMatrix( 333 return scope string text, 334 char separator = ',', 335 char quote = '"', 336 char comment = '\0', 337 ubyte skipRows = 0, 338 bool fill = true, 339 bool skipEmptyLines = true, 340 ) @trusted pure 341 { 342 pragma(inline, false); 343 344 import mir.ndslice.slice: Slice; 345 import mir.utility: _expect; 346 import std.array: appender; 347 348 auto app = appender!(string[]); 349 app.reserve(text.length / 32); 350 351 auto table = CsvReader( 352 text, 353 separator, 354 quote, 355 comment, 356 skipRows, 357 fill, 358 skipEmptyLines, 359 ); 360 361 auto wip = new string[table.nColumns]; 362 363 while (!table.empty) 364 { 365 auto row = table.front; 366 do 367 { 368 auto elem = row.front; 369 if (_expect(elem.error, false)) 370 row.validateCsvError(elem.error); 371 372 auto value = cast(string) elem.value; 373 if (_expect(elem.isScopeAllocated, false)) 374 value = value.idup; 375 376 wip[row.columnIndex] = value; 377 row.popFront; 378 } 379 while(!row.empty); 380 app.put(wip); 381 table.popFront; 382 } 383 384 import mir.ndslice: sliced; 385 assert (app.data.length == table.rowIndex * table.nColumns); 386 return app.data.sliced(table.rowIndex, table.nColumns); 387 } 388 389 /// 390 version (mir_ion_test) 391 @safe pure 392 unittest 393 { 394 // empty lines are allowed by default 395 auto data = `012,abc,"mno pqr",0` ~ "\n\n" ~ `982,def,"stuv wx",1` 396 ~ "\n" ~ `78,ghijk,"yx",2`; 397 398 auto matrix = data.csvToStringMatrix(); 399 400 import mir.ndslice.slice: Slice, SliceKind; 401 402 static assert(is(typeof(matrix) == Slice!(string*, 2))); 403 404 import mir.test: should; 405 matrix.should == 406 [[`012`, `abc`, `mno pqr`, `0`], [`982`, `def`, `stuv wx`, `1`], [`78`, `ghijk`, `yx`, `2`]]; 407 408 import mir.ndslice.dynamic: transposed; 409 auto transp = matrix.transposed; 410 static assert(is(typeof(transp) == Slice!(string*, 2, SliceKind.universal))); 411 412 transp.should == 413 [[`012`, `982`, `78`], [`abc`, `def`, `ghijk`], [`mno pqr`, `stuv wx`, `yx`], [`0`, `1`, `2`]]; 414 } 415 416 version (mir_ion_test) 417 @safe pure 418 unittest 419 { 420 // Optional parameters to csvToStringMatrix 421 auto data = `012;abc;"mno pqr";0` ~ "\n" ~ `982;def;"stuv wx";1` 422 ~ "\n" ~ `78;ghijk;"yx";2`; 423 424 import mir.test: should; 425 data.csvToStringMatrix(';', '"').should == 426 [["012", "abc", "mno pqr", "0"], ["982", "def", "stuv wx", "1"], ["78", "ghijk", "yx", "2"]]; 427 } 428 429 version (mir_ion_test) 430 @safe pure 431 unittest 432 { 433 auto data = `012,aa,bb,cc` ~ "\r\n" ~ `982,dd,ee,ff` ~ "\r\n" 434 ~ `789,gg,hh,ii` ~ "\r\n"; 435 436 import mir.test: should; 437 data.csvToStringMatrix.should == 438 [["012", "aa", "bb", "cc"], ["982", "dd", "ee", "ff"], ["789", "gg", "hh", "ii"]]; 439 } 440 441 version (mir_ion_test) 442 @safe pure 443 unittest 444 { 445 // Optional parameters here too 446 auto data = `012;aa;bb;cc` ~ "\r\n" ~ `982;dd;ee;ff` ~ "\r\n" 447 ~ `789;gg;hh;ii` ~ "\r\n"; 448 449 import mir.test: should; 450 data.csvToStringMatrix(';', '"').should == 451 [["012", "aa", "bb", "cc"], ["982", "dd", "ee", "ff"], ["789", "gg", "hh", "ii"]]; 452 } 453 454 version (mir_ion_test) 455 @safe pure 456 unittest 457 { 458 // Quoted fields that contains newlines and delimiters 459 auto data = `012,abc,"ha ha ` ~ "\n" ~ `ha this is a split value",567` 460 ~ "\n" ~ `321,"a,comma,b",def,111` ~ "\n"; 461 462 import mir.test: should; 463 data.csvToStringMatrix.should == 464 [["012", "abc", "ha ha \nha this is a split value", "567"], ["321", "a,comma,b", "def", "111"]]; 465 } 466 467 version (mir_ion_test) 468 @safe pure 469 unittest 470 { 471 // Quoted fields that contains newlines and delimiters, optional parameters for csvToStringMatrix 472 auto data = `012;abc;"ha ha ` ~ "\n" ~ `ha this is a split value";567` 473 ~ "\n" ~ `321;"a,comma,b";def;111` ~ "\n"; 474 475 import mir.test: should; 476 data.csvToStringMatrix(';', '"').should == 477 [["012", "abc", "ha ha \nha this is a split value", "567"], ["321", "a,comma,b", "def", "111"]]; 478 } 479 480 version (mir_ion_test) 481 @safe pure 482 unittest 483 { 484 // Quoted fields that contain quotes 485 // (Note: RFC-4180 does not allow doubled quotes in unquoted fields) 486 auto data = `012,"a b ""haha"" c",982` ~ "\n"; 487 488 import mir.test: should; 489 data.csvToStringMatrix.should == [["012", `a b "haha" c`, "982"]]; 490 } 491 492 version (mir_ion_test) 493 @safe pure 494 unittest 495 { 496 // Quoted fields that contain quotes, optional parameters for csvToStringMatrix 497 // (Note: RFC-4180 does not allow doubled quotes in unquoted fields) 498 auto data = `012;"a b ""haha"" c";982` ~ "\n"; 499 500 import mir.test: should; 501 data.csvToStringMatrix(';', '"').should == [["012", `a b "haha" c`, "982"]]; 502 } 503 504 version (mir_ion_test) 505 @safe pure 506 unittest 507 { 508 // Trailing empty fields (bug#1522) 509 import mir.test: should; 510 511 auto data = `,` ~ "\n"; 512 data.csvToStringMatrix.should == [["", ""]]; 513 514 data = `,,` ~ "\n"; 515 data.csvToStringMatrix.should == [["", "", ""]]; 516 517 data = "a,b,c,d" ~ "\n" ~ ",,," ~ "\n" ~ ",,," ~ "\n"; 518 data.csvToStringMatrix.should == 519 [["a", "b", "c", "d"], ["", "", "", ""], ["", "", "", ""]]; 520 521 data = "\"a\",b,c,\"d\","; 522 data.csvToStringMatrix.should == [["a", "b", "c", "d", ""]]; 523 524 data = "\"\",\"\","; 525 data.csvToStringMatrix.should == [["", "", ""]]; 526 } 527 528 // Boundary condition checks 529 version (mir_ion_test) 530 @safe pure 531 unittest 532 { 533 import mir.test: should; 534 535 auto data = `012,792,"def""`; 536 data.csvToStringMatrix.should == [[`012`, `792`, `def"`]]; 537 538 data = `012,792,"def""012`; 539 data.csvToStringMatrix.should == [[`012`, `792`, `def"012`]]; 540 541 data = `012,792,"a"`; 542 data.csvToStringMatrix.should == [[`012`, `792`, `a`]]; 543 544 data = `012,792,"`; 545 data.csvToStringMatrix.should == [[`012`, `792`, ``]]; 546 547 data = `012;;311`; 548 data.csvToStringMatrix(';').should == [[`012`, ``, `311`]]; 549 } 550 551 /++ 552 Returns: $(NDSLICEREF slice, Slice)`!(string*, 2)`. 553 See_also: $(LREF matrixAsDataFrame) 554 +/ 555 Slice!(CsvAlgebraic*, 2) csvToAlgebraicMatrix( 556 return scope string text, 557 char separator = ',', 558 char quote = '"', 559 scope const CsvProxy.Conversion[] conversions = CsvProxy.init.conversions, 560 char comment = '\0', 561 ubyte skipRows = 0, 562 bool fill = true, 563 bool skipEmptyLines = true, 564 bool parseNumbers = true, 565 bool parseTimestamps = true, 566 CsvAlgebraic delegate( 567 return scope const(char)[] unquotedString, 568 CsvAlgebraic scalar, 569 bool quoted, 570 size_t columnIndex 571 ) @safe pure conversionFinalizer = null 572 ) @trusted pure 573 { 574 pragma(inline, false); 575 576 import mir.bignum.decimal: Decimal, DecimalExponentKey; 577 import mir.ndslice.slice: Slice; 578 import mir.timestamp: Timestamp; 579 import mir.utility: _expect; 580 import std.array: appender; 581 582 auto app = appender!(CsvAlgebraic[]); 583 app.reserve(text.length / 32); 584 585 auto table = CsvReader( 586 text, 587 separator, 588 quote, 589 comment, 590 skipRows, 591 fill, 592 skipEmptyLines, 593 ); 594 595 auto wip = new CsvAlgebraic[table.nColumns]; 596 597 DecimalExponentKey decimalKey; 598 Decimal!128 decimal = void; 599 Timestamp timestamp; 600 601 while (!table.empty) 602 { 603 auto row = table.front; 604 do 605 { 606 auto elem = row.front; 607 if (_expect(elem.error, false)) 608 row.validateCsvError(elem.error); 609 610 CsvAlgebraic scalar; 611 612 enum bool allowSpecialValues = true; 613 enum bool allowDotOnBounds = true; 614 enum bool allowDExponent = true; 615 enum bool allowStartingPlus = true; 616 enum bool allowUnderscores = false; 617 enum bool allowLeadingZeros = false; 618 enum bool allowExponent = true; 619 enum bool checkEmpty = true; 620 621 if (_expect(elem.wasQuoted, false)) 622 { 623 auto value = cast(string) elem.value; 624 if (_expect(elem.isScopeAllocated, false)) 625 value = value.idup; 626 scalar = value; 627 } 628 else 629 if (parseNumbers && decimal.fromStringImpl!( 630 char, 631 allowSpecialValues, 632 allowDotOnBounds, 633 allowDExponent, 634 allowStartingPlus, 635 allowUnderscores, 636 allowLeadingZeros, 637 allowExponent, 638 checkEmpty) 639 (elem.value, decimalKey)) 640 { 641 if (decimalKey) 642 scalar = cast(double) decimal; 643 else 644 scalar = cast(long) decimal.coefficient; 645 } 646 else 647 if (parseTimestamps && Timestamp.fromISOExtString(elem.value, timestamp)) 648 { 649 scalar = timestamp; 650 } 651 else 652 { 653 foreach(ref target; conversions) 654 { 655 if (elem.value == target.from) 656 { 657 scalar = target.to; 658 goto Finalizer; 659 } 660 } 661 scalar = cast(string) elem.value; 662 } 663 664 Finalizer: 665 if (_expect(conversionFinalizer !is null, false)) 666 { 667 scalar = conversionFinalizer(elem.value, scalar, elem.wasQuoted, row.columnIndex); 668 } 669 670 wip[row.columnIndex] = scalar; 671 row.popFront; 672 } 673 while(!row.empty); 674 app.put(wip); 675 table.popFront; 676 } 677 678 import mir.ndslice: sliced; 679 assert (app.data.length == table.rowIndex * table.nColumns); 680 return app.data.sliced(table.rowIndex, table.nColumns); 681 } 682 683 /// 684 version(mir_ion_test) 685 unittest 686 { 687 import mir.csv; 688 import mir.ion.conv: serde; // to convert CsvProxy to D types 689 import mir.serde: serdeKeys, serdeIgnoreUnexpectedKeys, serdeOptional; 690 // mir.date and std.datetime are supported as well 691 import mir.timestamp: Timestamp;//mir-algorithm package 692 import mir.test: should; 693 694 auto text = 695 `Date,Open,High,Low,Close,Volume 696 2021-01-21 09:30:00,133.8,134.43,133.59,134.0,9166695,ignoreNoHeader 697 2021-01-21 09:35:00,134.25,135.0,134.19,134.5`;// fill the Volume with '0' 698 699 // If you don't have a header, 700 // `mir.functional.Tuple` instead of MyDataFrame. 701 @serdeIgnoreUnexpectedKeys //ignore all other columns 702 static struct MyDataFrame 703 { 704 // Few keys are allowed 705 @serdeKeys(`Date`, `date`, `timestamp`) 706 Timestamp[] timestamp; 707 708 @serdeKeys(`Open`) double[] open; 709 @serdeKeys(`High`) double[] high; 710 @serdeKeys(`Low`) double[] low; 711 @serdeKeys(`Close`) double[] close; 712 713 @serdeOptional // if we don't have Volume 714 @serdeKeys(`Volume`) 715 long[]volume; 716 } 717 718 MyDataFrame testValue = { 719 timestamp: [`2021-01-21 09:30:00`.Timestamp, `2021-01-21 09:35:00`.Timestamp], 720 volume: [9166695, 0], 721 open: [133.8, 134.25], 722 high: [134.43, 135], 723 low: [133.59, 134.19], 724 close: [134.0, 134.5], 725 }; 726 727 auto table = text // fill the missing and empty fields with '0' 728 .csvToAlgebraicMatrix(',', '"', [CsvProxy.Conversion("", 0.CsvAlgebraic)]) 729 .matrixAsDataFrame; 730 731 table["Volume"][0].should == 9166695; 732 table["Volume"][1].should == 0; 733 734 table.serde!MyDataFrame.should == testValue; 735 } 736 737 /++ 738 Represent CSV data as dictionary of columns. 739 Uses the first row as header. 740 Returns: a string map that refers the same header and the same data. 741 +/ 742 StringMap!(Slice!(T*, 1, SliceKind.universal)) 743 matrixAsDataFrame(T)(return scope Slice!(T*, 2) matrix) 744 @trusted pure 745 { 746 import mir.algebraic: isVariant; 747 import mir.array.allocation: array; 748 import mir.ion.exception: IonException; 749 import mir.ndslice.topology: byDim, map, as; 750 751 if (matrix.length == 0) 752 throw new IonException("mir.csv: Matrix should have at least a single row to get the header"); 753 754 static if (is(T == string)) 755 auto keys = matrix[0].field; 756 else 757 static if (isVariant!T) 758 auto keys = matrix[0].map!((ref x) => x.get!string).array; 759 else 760 auto keys = matrix[0].as!string.array; 761 762 auto data = matrix[1 .. $].byDim!1.array; 763 764 return typeof(return)(keys, data); 765 } 766 767 /// 768 version (mir_ion_test) 769 @safe pure 770 unittest 771 { 772 import mir.test: should; 773 774 auto data = "a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,12"; 775 776 import mir.ndslice.topology: as, map; 777 auto table = data 778 .csvToStringMatrix // see also csvToAlgebraicMatrix 779 .matrixAsDataFrame; 780 781 782 table["a"].should == ["1", "4", "7", "10"]; 783 784 table.keys.should == ["a", "b", "c"]; 785 table.values 786 .map!(column => column[].as!double) 787 .should == [ 788 [1, 4, 7, 10], // first column 789 [2, 5, 8, 11], // ... 790 [3, 6, 9, 12]]; 791 } 792 793 /++ 794 +/ 795 auto objectsAsTable(bool allowMissingFields = true, T)(return scope const(StringMap!T)[] objects, return scope const(string)[] header) 796 @safe pure nothrow @nogc 797 if (isImplicitlyConvertible!(const T, T)) 798 { 799 import mir.algebraic: Variant; 800 import mir.ndslice.concatenation: concatenation; 801 import mir.ndslice.slice: Slice, sliced; 802 import mir.ndslice.topology: as, repeat; 803 804 auto rows = objectsAsRows!allowMissingFields(objects, header); 805 806 alias V = Variant!(typeof(rows[0]), Slice!(const(string)*)); 807 808 return V(header.sliced).repeat(1).concatenation(rows.as!V); 809 } 810 811 /// 812 version (mir_ion_test) 813 @safe pure 814 unittest 815 { 816 import mir.algebraic_alias.csv: T = CsvAlgebraic; 817 import mir.algebraic: Nullable; 818 import mir.date: Date; 819 import mir.test: should; 820 821 auto o1 = ["a" : 1.T, 822 "b" : 2.0.T] 823 .StringMap!T; 824 auto o2 = ["b" : true.T, 825 "c" : false.T] 826 .StringMap!T; 827 auto o3 = ["c" : Date(2021, 12, 12).T, 828 "d" : 3.T] 829 .StringMap!T; 830 831 import mir.ser.text: serializeText; 832 [o1, o2, o3].objectsAsTable(["b", "c"]).serializeText.should 833 == `[["b","c"],[2.0,null],[true,false],[null,2021-12-12]]`; 834 835 [o1, o2].objectsAsTable!false(["b"]).serializeText.should 836 == `[["b"],[2.0],[true]]`; 837 838 import std.exception: assertThrown; 839 import mir.ion.exception: IonException; 840 [o1, o2, o3].objectsAsTable!false(["b", "c"]).serializeText 841 .assertThrown!IonException; 842 } 843 844 /++ 845 Contruct a lazy random-access-range (ndslice) 846 Returns: 847 a lazy 1-dimensional slice of lazy 1-dimensionalal slices 848 +/ 849 auto objectsAsRows(bool allowMissingFields = true, T)(return scope const(StringMap!T)[] objects, return scope const(string)[] header) 850 @safe pure nothrow @nogc 851 if (isImplicitlyConvertible!(const T, T)) 852 { 853 import mir.ndslice.topology: repeat, map, zip, iota; 854 855 static if (allowMissingFields) 856 { 857 return header 858 .repeat(objects.length) 859 .zip(objects) 860 .map!( 861 (header, object) => object 862 .repeat(header.length) 863 .zip(header) 864 .map!( 865 (object, name) 866 { 867 import mir.algebraic: Nullable; 868 if (auto ptr = name in object) 869 return Nullable!T(*ptr); 870 return Nullable!T.init; 871 } 872 ) 873 ); 874 } 875 else 876 { 877 return header 878 .repeat(objects.length) 879 .zip(objects, objects.length.iota) 880 .map!( 881 (header, object, row) => object 882 .repeat(header.length) 883 .zip(header, row.repeat(header.length)) 884 .map!( 885 (object, name, row) 886 { 887 if (auto ptr = name in object) 888 return *ptr; 889 import mir.ion.exception: IonMirException; 890 throw new IonMirException("mir.csv: row ", row + 1, ": missing field '", name, "'"); 891 } 892 ) 893 ); 894 } 895 } 896 897 /// 898 version (mir_ion_test) 899 @safe pure 900 unittest 901 { 902 import mir.algebraic_alias.csv: T = CsvAlgebraic; 903 import mir.algebraic: Nullable; 904 import mir.date: Date; 905 import mir.test: should; 906 907 auto o1 = ["a" : 1.T, 908 "b" : 2.0.T] 909 .StringMap!T; 910 auto o2 = ["b" : true.T, 911 "c" : false.T] 912 .StringMap!T; 913 auto o3 = ["c" : Date(2021, 12, 12).T, 914 "d" : 3.T] 915 .StringMap!T; 916 917 alias NCA = Nullable!T; 918 919 auto rows = [o1, o2, o3].objectsAsRows(["b", "c"]); 920 rows.should == [ 921 // a b 922 [NCA(2.0.T), NCA(null)], 923 [NCA(true.T), NCA(false.T)], 924 [NCA(null), NCA(Date(2021, 12, 12))], 925 ]; 926 927 static assert(is(typeof(rows[0][0]) == NCA)); 928 929 // evaluate 930 import mir.ndslice.fuse: fuse; 931 static assert(is(typeof(rows.fuse) == Slice!(NCA*, 2))); 932 } 933 934 /++ 935 Returns: 936 all keys of all the objects in the observed order. 937 Params: 938 objects = array of objects (string maps) 939 +/ 940 string[] inclusiveHeader(T)(return scope const(StringMap!T)[] objects) 941 @safe pure nothrow 942 { 943 if (objects.length == 0) 944 return null; 945 946 auto map = StringMap!bool( 947 objects[0].keys.dup, 948 new bool[objects[0].keys.length]); 949 950 foreach (object; objects[1 .. $]) 951 foreach (key; object.keys) 952 map[key] = false; 953 954 return (()@trusted => cast(string[]) map.keys)(); 955 } 956 957 /// 958 version (mir_ion_test) 959 @safe pure 960 unittest 961 { 962 import mir.test: should; 963 964 auto o1 = ["a", "b"].StringMap!int([8, 8]); 965 auto o2 = ["b", "c"].StringMap!int([8, 8]); 966 auto o3 = ["c", "d"].StringMap!int([8, 8]); 967 [o1, o2, o3].inclusiveHeader.should = ["a", "b", "c", "d"]; 968 [o3, o2, o1].inclusiveHeader.should = ["c", "d", "b", "a"]; 969 } 970 971 /++ 972 Returns: 973 common keys of all the objects in the observed order. 974 Params: 975 objects = array of objects (string maps) 976 +/ 977 string[] intersectionHeader(T)(return scope const(StringMap!T)[] objects) 978 @safe pure nothrow 979 { 980 if (objects.length == 0) 981 return null; 982 983 auto map = StringMap!bool( 984 objects[0].keys.dup, 985 new bool[objects[0].keys.length]); 986 987 foreach (object; objects[1 .. $]) 988 foreach (key; map.keys) 989 if (key !in object) 990 map.remove(key); 991 992 return (()@trusted => cast(string[]) map.keys)(); 993 } 994 995 /// 996 version (mir_ion_test) 997 @safe pure 998 unittest 999 { 1000 import mir.test: should; 1001 1002 auto o1 = ["a", "b"].StringMap!int([8, 8]); 1003 auto o2 = ["b", "c"].StringMap!int([8, 8]); 1004 auto o3 = ["c", "d"].StringMap!int([8, 8]); 1005 [o1, o2].intersectionHeader.should = ["b"]; 1006 [o3, o2].intersectionHeader.should = ["c"]; 1007 } 1008 1009 /++ 1010 CSV serialization function. 1011 +/ 1012 string serializeCsv(V)( 1013 auto scope ref const V value, 1014 char separator = ',', 1015 char quote = '"', 1016 bool quoteAll = false, 1017 string naValue = "", 1018 string trueValue = "TRUE", 1019 string falseValue = "FALSE", 1020 int serdeTarget = SerdeTarget.csv) 1021 { 1022 import std.array: appender; 1023 auto app = appender!(char[]); 1024 .serializeCsv!(typeof(app), V)(app, value, 1025 separator, 1026 quote, 1027 quoteAll, 1028 naValue, 1029 trueValue, 1030 falseValue, 1031 serdeTarget); 1032 return (()@trusted => cast(string) app.data)(); 1033 } 1034 1035 /// 1036 version(mir_ion_test) 1037 @safe pure 1038 unittest 1039 { 1040 import mir.timestamp: Timestamp; 1041 import mir.format: stringBuf; 1042 import mir.test; 1043 auto someMatrix = [ 1044 [3.0.CsvAlgebraic, 2.CsvAlgebraic, true.CsvAlgebraic, ], 1045 ["str".CsvAlgebraic, "2022-12-12".Timestamp.CsvAlgebraic, "".CsvAlgebraic, null.CsvAlgebraic], 1046 [double.nan.CsvAlgebraic, double.infinity.CsvAlgebraic, 0.0.CsvAlgebraic] 1047 ]; 1048 1049 someMatrix.serializeCsv.should == "3.0,2,TRUE\nstr,2022-12-12,\"\",\nNAN,+INF,0.0\n"; 1050 } 1051 1052 /++ 1053 Ion serialization for custom outputt range. 1054 +/ 1055 void serializeCsv(Appender, V)( 1056 scope ref Appender appender, 1057 auto scope ref const V value, 1058 char separator = ',', 1059 char quote = '"', 1060 bool quoteAll = false, 1061 string naValue = "", 1062 string trueValue = "TRUE", 1063 string falseValue = "FALSE", 1064 int serdeTarget = SerdeTarget.csv) 1065 if (isOutputRange!(Appender, const(char)[]) && isOutputRange!(Appender, char)) 1066 { 1067 auto serializer = CsvSerializer!Appender((()@trusted => &appender)()); 1068 serializer.serdeTarget = serdeTarget; 1069 serializer.separator = separator; 1070 serializer.quote = quote; 1071 serializer.quoteAll = quoteAll; 1072 serializer.naValue = naValue; 1073 serializer.trueValue = trueValue; 1074 serializer.falseValue = falseValue; 1075 import mir.ser: serializeValue; 1076 serializeValue(serializer, value); 1077 } 1078 1079 /// 1080 @safe pure // nothrow @nogc 1081 unittest 1082 { 1083 import mir.timestamp: Timestamp; 1084 import mir.format: stringBuf; 1085 import mir.test; 1086 1087 auto someMatrix = [ 1088 ["str".CsvAlgebraic, 2.CsvAlgebraic, true.CsvAlgebraic], 1089 [3.0.CsvAlgebraic, "2022-12-12".Timestamp.CsvAlgebraic, null.CsvAlgebraic] 1090 ]; 1091 1092 auto buffer = stringBuf; 1093 buffer.serializeCsv(someMatrix); 1094 buffer.data.should == "str,2,TRUE\n3.0,2022-12-12,\n"; 1095 } 1096 1097 /// 1098 struct CsvSerializer(Appender) 1099 { 1100 import mir.bignum.decimal: Decimal; 1101 import mir.bignum.integer: BigInt; 1102 import mir.format: print, stringBuf, printReplaced; 1103 import mir.internal.utility: isFloatingPoint; 1104 import mir.ion.type_code; 1105 import mir.lob; 1106 import mir.string: containsAny; 1107 import mir.timestamp; 1108 import std.traits: isNumeric; 1109 1110 /++ 1111 CSV string buffer 1112 +/ 1113 Appender* appender; 1114 1115 /// Scalar separator 1116 char separator = ','; 1117 /// Symbol to quote scalars 1118 char quote = '"'; 1119 /// 1120 bool quoteAll; 1121 1122 /// 1123 string naValue = ""; 1124 /// 1125 string trueValue = "TRUE"; 1126 /// 1127 string falseValue = "FALSE"; 1128 1129 /// Mutable value used to choose format specidied or user-defined serialization specializations 1130 int serdeTarget = SerdeTarget.csv; 1131 1132 private uint level, row, column; 1133 1134 1135 @safe scope: 1136 1137 /// 1138 size_t stringBegin() 1139 { 1140 appender.put('"'); 1141 return 0; 1142 } 1143 1144 /++ 1145 Puts string part. The implementation allows to split string unicode points. 1146 +/ 1147 void putStringPart(scope const(char)[] value) 1148 { 1149 printReplaced(appender, value, '"', `""`); 1150 } 1151 1152 /// 1153 void stringEnd(size_t) 1154 { 1155 appender.put('"'); 1156 } 1157 1158 /// 1159 size_t structBegin(size_t length = size_t.max) 1160 { 1161 throw new Exception("mir.csv: structure serialization isn't supported: "); 1162 } 1163 1164 /// 1165 void structEnd(size_t state) 1166 { 1167 throw new Exception("mir.csv: structure serialization isn't supported"); 1168 } 1169 1170 /// 1171 size_t listBegin(size_t length = size_t.max) 1172 { 1173 assert(level <= 2); 1174 if (level++ >= 2) 1175 throw new Exception("mir.csv: arrays can't be serialized as scalar values"); 1176 return 0; 1177 } 1178 1179 /// 1180 void listEnd(size_t state) 1181 { 1182 if (level-- == 2) 1183 { 1184 column = 0; 1185 appender.put('\n'); 1186 } 1187 else 1188 { 1189 row = 0; 1190 } 1191 } 1192 1193 /// 1194 alias sexpBegin = listBegin; 1195 1196 /// 1197 alias sexpEnd = listEnd; 1198 1199 /// 1200 void putSymbol(scope const char[] symbol) 1201 { 1202 putValue(symbol); 1203 } 1204 1205 /// 1206 void putAnnotation(scope const(char)[] annotation) 1207 { 1208 assert(0); 1209 } 1210 1211 /// 1212 auto annotationsEnd(size_t state) 1213 { 1214 assert(0); 1215 } 1216 1217 /// 1218 size_t annotationWrapperBegin(size_t length = size_t.max) 1219 { 1220 throw new Exception("mir.csv: annotation serialization isn't supported"); 1221 } 1222 1223 /// 1224 void annotationWrapperEnd(size_t annotationsState, size_t state) 1225 { 1226 assert(0); 1227 } 1228 1229 /// 1230 void nextTopLevelValue() 1231 { 1232 appender.put('\n'); 1233 } 1234 1235 /// 1236 void putKey(scope const char[] key) 1237 { 1238 assert(0); 1239 } 1240 1241 /// 1242 void putValue(Num)(const Num value) 1243 if (isNumeric!Num && !is(Num == enum)) 1244 { 1245 auto buf = stringBuf; 1246 static if (isFloatingPoint!Num) 1247 { 1248 import mir.math.common: fabs; 1249 1250 if (value.fabs < value.infinity) 1251 print(buf, value); 1252 else if (value == Num.infinity) 1253 buf.put(`+INF`); 1254 else if (value == -Num.infinity) 1255 buf.put(`-INF`); 1256 else 1257 buf.put(`NAN`); 1258 } 1259 else 1260 print(buf, value); 1261 putValue(buf.data); 1262 } 1263 1264 /// 1265 void putValue(size_t size)(auto ref const BigInt!size num) 1266 { 1267 auto buf = stringBuf; 1268 num.toString(buf); 1269 putValue(buf.data); 1270 } 1271 1272 /// 1273 void putValue(size_t size)(auto ref const Decimal!size num) 1274 { 1275 auto buf = stringBuf; 1276 num.toString(buf); 1277 putValue(buf.data); 1278 } 1279 1280 /// 1281 void putValue(typeof(null)) 1282 { 1283 putValue(naValue, true); 1284 } 1285 1286 /// ditto 1287 void putNull(IonTypeCode code) 1288 { 1289 putValue(null); 1290 } 1291 1292 /// 1293 void putValue(bool b) 1294 { 1295 putValue(b ? trueValue : falseValue, true); 1296 } 1297 1298 /// 1299 void putValue(scope const char[] value, bool noQuote = false) 1300 { 1301 import mir.ion.exception: IonMirException; 1302 import mir.utility: _expect; 1303 1304 if (_expect(level != 2, false)) 1305 throw new IonMirException( 1306 "mir.csv: expected ", 1307 level ? "row" : "table", 1308 " value, got scalar value '", value, "'"); 1309 1310 if (!quoteAll 1311 && (noQuote || !value.containsAny(separator, quote, '\n')) 1312 && ((value == naValue || value == trueValue || value == falseValue) == noQuote) 1313 ) 1314 { 1315 appender.put(value); 1316 } 1317 else 1318 { 1319 auto state = stringBegin; 1320 putStringPart(value); 1321 stringEnd(state); 1322 } 1323 } 1324 1325 /// 1326 void putValue(scope Clob value) 1327 { 1328 import mir.format: printEscaped, EscapeFormat; 1329 1330 auto buf = stringBuf; 1331 1332 buf.put(`{{"`); 1333 1334 printEscaped!(char, EscapeFormat.ionClob)(buf, value.data); 1335 1336 buf.put(`"}}`); 1337 1338 putValue(buf.data); 1339 } 1340 1341 /// 1342 void putValue(scope Blob value) 1343 { 1344 import mir.base64 : encodeBase64; 1345 1346 auto buf = stringBuf; 1347 1348 buf.put("{{"); 1349 1350 encodeBase64(value.data, buf); 1351 1352 buf.put("}}"); 1353 1354 putValue(buf.data); 1355 } 1356 1357 /// 1358 void putValue(Timestamp value) 1359 { 1360 auto buf = stringBuf; 1361 value.toISOExtString(buf); 1362 putValue(buf.data); 1363 } 1364 1365 /// 1366 void elemBegin() 1367 { 1368 if (level == 2) 1369 { 1370 if (column++) 1371 appender.put(separator); 1372 } 1373 else 1374 { 1375 row++; 1376 } 1377 } 1378 1379 /// 1380 alias sexpElemBegin = elemBegin; 1381 } 1382 1383 /++ 1384 A proxy that allows to converty CSV to a table in another data format. 1385 +/ 1386 struct CsvProxy 1387 { 1388 import mir.algebraic_alias.csv: CsvAlgebraic; 1389 import mir.ion.exception: IonMirException; 1390 /// An input CSV text. BOM isn't supported. 1391 const(char)[] text; 1392 /// If true the elements in the first row are symbolised. 1393 bool hasHeader; 1394 /// Scalar separator 1395 char separator = ','; 1396 /// Symbol to quote scalars 1397 char quote = '"'; 1398 /// Skips rows the first consequent lines, which starts with this character. 1399 char comment = '\0'; 1400 /// Skips a number of rows 1401 ubyte skipRows; 1402 /// 1403 bool fill = true; 1404 /// 1405 bool skipEmptyLines = true; 1406 /// If true the parser tries to recognsise and parse numbers. 1407 bool parseNumbers = true; 1408 /// If true the parser tries to recognsise and parse 1409 // ISO timestamps in the extended form. 1410 bool parseTimestamps = true; 1411 1412 /// A number of conversion conventions. 1413 struct Conversion 1414 { 1415 /// 1416 string from; 1417 /// 1418 CsvAlgebraic to; 1419 } 1420 1421 /++ 1422 The conversion map represented as array of `from->to` pairs. 1423 1424 Note: 1425 automated number recognition works with values like `NaN` and `+Inf` already. 1426 +/ 1427 const(Conversion)[] conversions = [ 1428 Conversion("", null.CsvAlgebraic), 1429 Conversion("TRUE", true.CsvAlgebraic), 1430 Conversion("FALSE", false.CsvAlgebraic), 1431 ]; 1432 1433 /++ 1434 N/A and NULL patterns are converted to Ion `null` when exposed to arrays 1435 and skipped when exposed to objects 1436 +/ 1437 const(string)[] naStrings = [ 1438 ``, 1439 ]; 1440 1441 const(string)[] trueStrings = [ 1442 `TRUE`, 1443 ]; 1444 1445 const(string)[] falseStrings = [ 1446 `FALSE`, 1447 ]; 1448 1449 /// File name for berrer error messages 1450 string fileName = "<unknown>"; 1451 1452 // /++ 1453 // +/ 1454 // bool delegate(size_t columnIndex, scope const(char)[] columnName) useColumn; 1455 1456 /++ 1457 Conversion callback to finish conversion resolution 1458 Params: 1459 unquotedString = string after unquoting 1460 kind = currently recognized path 1461 columnIndex = column index starting from 0 1462 +/ 1463 CsvAlgebraic delegate( 1464 return scope const(char)[] unquotedString, 1465 CsvAlgebraic scalar, 1466 bool quoted, 1467 size_t columnIndex 1468 ) @safe pure @nogc conversionFinalizer; 1469 1470 /++ 1471 +/ 1472 static bool defaultIsSymbolHandler(scope const(char)[] symbol, bool quoted) @safe pure @nogc nothrow 1473 { 1474 import mir.algorithm.iteration: all; 1475 return !quoted && symbol.length && symbol.all!( 1476 c => 1477 'a' <= c && c <= 'z' || 1478 'A' <= c && c <= 'Z' || 1479 c == '_' 1480 ); 1481 } 1482 1483 /++ 1484 A function used to determine if a string should be passed 1485 to a serializer as a symbol instead of strings. 1486 That may help to reduce memory allocation for data with 1487 a huge amount of equal cell values.`` 1488 The default pattern follows regular expression `[a-zA-Z_]+` 1489 and requires symbol to be presented without double quotes. 1490 +/ 1491 bool function(scope const(char)[] symbol, bool quoted) @safe pure @nogc isSymbolHandler = &defaultIsSymbolHandler; 1492 1493 void serialize(S)(scope ref S serializer) scope const @trusted 1494 { 1495 import mir.bignum.decimal: Decimal, DecimalExponentKey; 1496 import mir.exception: MirException; 1497 import mir.ser: serializeValue; 1498 import mir.timestamp: Timestamp; 1499 import mir.utility: _expect; 1500 1501 auto table = CsvReader( 1502 text, 1503 separator, 1504 quote, 1505 comment, 1506 skipRows, 1507 fill, 1508 skipEmptyLines, 1509 ); 1510 1511 if (hasHeader && table.empty) 1512 { 1513 serializer.putValue(null); 1514 return; 1515 } 1516 1517 DecimalExponentKey decimalKey; 1518 Decimal!128 decimal = void; 1519 Timestamp timestamp; 1520 1521 size_t outerState = serializer.listBegin; 1522 1523 if (hasHeader) 1524 { 1525 serializer.elemBegin; 1526 auto state = serializer.listBegin; 1527 foreach (elem; table.front) 1528 { 1529 assert(!elem.error); 1530 serializer.elemBegin; 1531 serializer.putSymbol(elem.value); 1532 } 1533 serializer.listEnd(state); 1534 table.popFront; 1535 } 1536 1537 do 1538 { 1539 serializer.elemBegin; 1540 auto state = serializer.listBegin; 1541 auto row = table.front; 1542 do 1543 { 1544 auto elem = row.front; 1545 1546 if (_expect(elem.error, false)) 1547 row.validateCsvError(elem.error); 1548 1549 CsvAlgebraic scalar; 1550 1551 enum bool allowSpecialValues = true; 1552 enum bool allowDotOnBounds = true; 1553 enum bool allowDExponent = true; 1554 enum bool allowStartingPlus = true; 1555 enum bool allowUnderscores = false; 1556 enum bool allowLeadingZeros = false; 1557 enum bool allowExponent = true; 1558 enum bool checkEmpty = true; 1559 1560 if (_expect(elem.wasQuoted, false)) 1561 { 1562 scalar = cast(string) elem.value; 1563 } 1564 else 1565 if (parseNumbers && decimal.fromStringImpl!( 1566 char, 1567 allowSpecialValues, 1568 allowDotOnBounds, 1569 allowDExponent, 1570 allowStartingPlus, 1571 allowUnderscores, 1572 allowLeadingZeros, 1573 allowExponent, 1574 checkEmpty) 1575 (elem.value, decimalKey)) 1576 { 1577 if (decimalKey) 1578 scalar = cast(double) decimal; 1579 else 1580 scalar = cast(long) decimal.coefficient; 1581 } 1582 else 1583 if (parseTimestamps && Timestamp.fromISOExtString(elem.value, timestamp)) 1584 { 1585 scalar = timestamp; 1586 } 1587 else 1588 { 1589 foreach(ref target; conversions) 1590 { 1591 if (elem.value == target.from) 1592 { 1593 scalar = target.to; 1594 goto Finalizer; 1595 } 1596 } 1597 scalar = cast(string) elem.value; 1598 } 1599 1600 Finalizer: 1601 if (_expect(conversionFinalizer !is null, false)) 1602 { 1603 scalar = conversionFinalizer(elem.value, scalar, elem.wasQuoted, row.columnIndex); 1604 } 1605 serializer.elemBegin; 1606 serializer.serializeValue(scalar); 1607 row.popFront; 1608 } 1609 while(!row.empty); 1610 serializer.listEnd(state); 1611 table.popFront; 1612 } 1613 while (!table.empty); 1614 serializer.listEnd(outerState); 1615 } 1616 } 1617 1618 /// Matrix 1619 version (mir_ion_test) 1620 @safe pure 1621 unittest 1622 { 1623 import mir.test: should; 1624 import mir.ndslice.slice: Slice; 1625 import mir.ion.conv: serde; 1626 import mir.ser.text; 1627 1628 alias Matrix = Slice!(double*, 2); 1629 1630 auto text = "1,2\n3,4\r\n5,6\n"; 1631 1632 auto matrix = text.CsvProxy.serde!Matrix; 1633 matrix.should == [[1, 2], [3, 4], [5, 6]]; 1634 } 1635 1636 /++ 1637 Type resolution is performed for types defined in $(MREF mir,algebraic_alias,csv): 1638 1639 $(UL 1640 $(LI `typeof(null)` - used for N/A values) 1641 $(LI `bool`) 1642 $(LI `long`) 1643 $(LI `double`) 1644 $(LI `string`) 1645 $(LI $(AlgorithmREF timestamp, Timestamp)) 1646 ) 1647 +/ 1648 version (mir_ion_test) 1649 @safe pure 1650 unittest 1651 { 1652 import mir.ion.conv: serde; 1653 import mir.ndslice.slice: Slice; 1654 import mir.ser.text: serializeTextPretty; 1655 import mir.test: should; 1656 import std.string: join; 1657 1658 // alias Matrix = Slice!(CsvAlgebraic*, 2); 1659 1660 CsvProxy csv = { 1661 conversionFinalizer : ( 1662 unquotedString, 1663 scalar, 1664 wasQuoted, 1665 columnIndex) 1666 { 1667 // Do we want to symbolize the data? 1668 return !wasQuoted && unquotedString == `Billion` ? 1669 1000000000.CsvAlgebraic : 1670 scalar; 1671 }, 1672 text : join([ 1673 // User-defined conversion 1674 `Billion` 1675 // `long` patterns 1676 , `100`, `+200`, `-200` 1677 // `double` pattern 1678 , `+1.0`, `-.2`, `3.`, `3e-10`, `3d20` 1679 // also `double` pattern 1680 , `inf`, `+Inf`, `-INF`, `+NaN`, `-nan`, `NAN` 1681 // `bool` patterns 1682 , `TRUE`, `FALSE` 1683 // `Timestamp` patterns 1684 , `2021-02-03` // iso8601 extended 1685 , `2001-12-15T02:59:43.1Z` //canonical 1686 // Default NA patterns are converted to Ion `null` when exposed to arrays 1687 // and skipped when exposed to objects 1688 , `` 1689 // strings 1690 , `100_000` 1691 , `_ab0` 1692 , `_abc` 1693 , `Str` 1694 , `Value100` 1695 , `iNF` 1696 , `Infinity` 1697 , `+Infinity` 1698 , `.Infinity` 1699 , `""` 1700 , ` ` 1701 ], `,`) 1702 }; 1703 1704 // Serializing CsvProxy to Amazon Ion (text version) 1705 csv.serializeTextPretty!" ".should == 1706 `[ 1707 [ 1708 1000000000, 1709 100, 1710 200, 1711 -200, 1712 1.0, 1713 -0.2, 1714 3.0, 1715 3e-10, 1716 3e+20, 1717 +inf, 1718 +inf, 1719 -inf, 1720 nan, 1721 nan, 1722 nan, 1723 true, 1724 false, 1725 2021-02-03, 1726 2001-12-15T02:59:43.1Z, 1727 null, 1728 "100_000", 1729 "_ab0", 1730 "_abc", 1731 "Str", 1732 "Value100", 1733 "iNF", 1734 "Infinity", 1735 "+Infinity", 1736 ".Infinity", 1737 "", 1738 " " 1739 ] 1740 ]`; 1741 } 1742 1743 /++ 1744 Transposed Matrix & Tuple support 1745 +/ 1746 version (mir_ion_test) 1747 @safe pure 1748 unittest 1749 { 1750 import mir.ion.conv: serde; 1751 import mir.date: Date; //also wotks with mir.timestamp and std.datetime 1752 import mir.functional: Tuple; 1753 import mir.ser.text: serializeText; 1754 import mir.test: should; 1755 import mir.ndslice.dynamic: transposed; 1756 1757 auto text = "str,2022-10-12,3.4\nb,2022-10-13,2\n"; 1758 1759 auto matrix = text.CsvProxy.serde!(Slice!(CsvAlgebraic*, 2)); 1760 matrix.transposed.serializeText.should 1761 == q{[["str","b"],[2022-10-12,2022-10-13],[3.4,2]]}; 1762 1763 alias T = Tuple!(string[], Date[], double[]); 1764 1765 matrix.transposed.serde!T.should == T( 1766 [`str`, `b`], 1767 [Date(2022, 10, 12), Date(2022, 10, 13)], 1768 [3.4, 2], 1769 ); 1770 } 1771 1772 /// Converting NA to NaN 1773 version (mir_ion_test) 1774 @safe pure 1775 unittest 1776 { 1777 import mir.csv; 1778 import mir.algebraic: Nullable, visit; 1779 import mir.ion.conv: serde; 1780 import mir.ndslice: Slice, map, slice; 1781 import mir.ser.text: serializeText; 1782 import mir.test: should; 1783 1784 auto text = "1,2\n3,4\n5,\n"; 1785 auto matrix = text 1786 .CsvProxy 1787 .serde!(Slice!(Nullable!double*, 2)) 1788 .map!(visit!((double x) => x, (_) => double.nan)) 1789 .slice; 1790 1791 matrix.serializeText.should == q{[[1.0,2.0],[3.0,4.0],[5.0,nan]]}; 1792 }