The OpenD Programming Language

1 /++
2 $(H2 CSV/TSV parsing)
3 
4 $(LREF CsvProxy) can be serialized to Ion, JSON, MsgPack, or YAML
5 and then deserialized to a specified type.
6 That approachs allows to use the same mir deserialization
7 pattern like for other data types.
8 $(IONREF conv, serde) unifies this two steps throught binary Ion format,
9 which serves as an efficient DOM representation for all other formats.
10 
11 Macros:
12     IONREF = $(REF_ALTTEXT $(TT $2), $2, mir, ion, $1)$(NBSP)
13     AlgorithmREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, $1)$(NBSP)
14     NDSLICEREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, ndslice, $1)$(NBSP)
15     AAREF = $(REF_ALTTEXT $(TT $2), $2, mir, algebraic_alias, $1)$(NBSP)
16 +/
17 
18 module mir.csv;
19 
20 import mir.primitives: isOutputRange;
21 import mir.serde: SerdeTarget;
22 import mir.ndslice.slice: Slice, SliceKind;
23 import mir.string_map: StringMap;
24 import std.traits: isImplicitlyConvertible;
25 
26 ///
27 public import mir.algebraic_alias.csv: CsvAlgebraic;
28 
29 
30 /++
31 Rapid CSV reader represented as a range of rows.
32 
33 The structure isn't copyable. Please use it's pointer with range modifiers.
34 
35 Exactly one call of `empty` has to be preciding each call of `front`.
36 Exactly one call of `popFront` has to be following each call of `front`.
37 Some Phobos functions doesn't follow this rule.
38 
39 All elements of the each row have to be accessed exactly once before
40 the next row can be processed.
41 +/
42 struct CsvReader
43 {
44     import mir.appender: ScopedBuffer, scopedBuffer;
45     import mir.utility: _expect;
46     import mir.string: scanLeftAny;
47 
48     /// An input CSV text. BOM isn't supported.
49     const(char)[] text;
50     ///
51     uint nColumns;
52     ///
53     uint rowIndex;
54     /// Scalar separator
55     char separator = ',';
56     /// Symbol to quote scalars
57     char quote = '"';
58     ///
59     bool fill = true;
60     ///
61     bool skipEmptyLines = true;
62 
63     private ScopedBuffer!(char, 128) buffer;
64 
65     /++
66     +/
67     enum Error
68     {
69         ///
70         none,
71         // ///
72         // missingLeftQuote,
73         ///
74         unexpectedSeparator,
75         ///
76         unexpectedRowEnd,
77     }
78 
79     /++
80     CSV cell element
81     +/
82     struct Scalar
83     {
84         /++
85         Unquoted string.
86 
87         $(LREF .CsvReader.Scalar.wasQuoted) is set, then the value refers
88         $(LREF .CsvRow.buffer) and valid only until the next quoted string is produced.
89         +/
90         const(char)[] value;
91 
92         bool wasQuoted;
93         /++
94         If the flag is true the $(LREF .CsvReader.Scalar.value) member refers the $(LREF .CsvRow.buffer) the original text,
95         otherwise it .
96         +/
97         bool isScopeAllocated;
98 
99         /++
100         +/
101         Error error;
102     }
103 
104     /++
105     CSV Row Input Range
106 
107     Exactly one call of `empty` has to be preciding each call of `front`.
108     Exactly one call of `popFront` has to be following each call of `front`.
109     Some Phobos functions doesn't follow this rule.
110     +/
111     struct Row
112     {
113         private CsvReader* root;
114         ///
115         uint length;
116 
117         /++
118         Throws: IonMirException if the $(LREF CsvReader.Error) is set.
119         Returns: `void`
120         +/
121         auto validateCsvError(CsvReader.Error error)
122             scope const @safe pure
123         {
124             import mir.ion.exception: IonMirException;
125 
126             final switch (error)
127             {
128                 case CsvReader.Error.none: break;
129                 // case CsvReader.Error.missingLeftQuote: throw new IonMirException("mir.csv: missing left quote when parsing element at index [", root.rowIndex, ", ", columnIndex, "]");
130                 case CsvReader.Error.unexpectedSeparator: throw new IonMirException("mir.csv: unexpected separator when parsing element at index [", root.rowIndex, ", ", columnIndex, "]");
131                 case CsvReader.Error.unexpectedRowEnd: throw new IonMirException("mir.csv: unexpected row end when parsing element at index [", root.rowIndex, ", ", columnIndex, "]");
132             }
133         }
134 
135         ///
136         bool empty()() scope const pure nothrow @nogc @property
137             in (root)
138         {
139             return length == 0;
140         }
141 
142         /++
143         The function has be called after the front value is precessed.
144         +/
145         void popFront()() scope pure nothrow @nogc
146             in (root)
147             in (length)
148         {
149             length--;
150         }
151 
152         ///
153         Scalar front()() return scope pure nothrow @nogc @property
154             in (root)
155             in (length)
156             // in (length == 1 || root.text.length)
157         {
158             auto scalar = root.readCell();
159             // if (_expect(!scalar.error, true))
160             with (root)
161             {
162                 if (text.length && text[0] == separator)
163                 {
164                     text = text.ptr[1 .. text.length];
165                     if (_expect(length == 1, false))
166                     {
167                         for(;;)
168                         {
169                             auto ignored = root.readCell;
170                             if (!text.length)
171                                 break;
172                             if (text[0] != separator)
173                                 goto StripLineEnd;
174                             text = text.ptr[1 .. text.length];
175                         }
176                     }
177                 }
178                 else
179                 {
180                     if (_expect(length != 1, false))
181                     {
182                         if (!fill)
183                             scalar.error = Error.unexpectedRowEnd;
184                     }
185                     else
186                     if (text.length)
187                     {
188                     StripLineEnd:
189                         text = text.ptr[1 + (text.length > 1 && text[0] == '\r' && text[1] == '\n') .. text.length];
190                     }
191                 }
192             }
193             return scalar;
194         }
195 
196         uint columnIndex()() scope const @safe pure nothrow @nogc
197             in (root)
198         {
199             return root.nColumns - length;
200         }
201     }
202 
203     ///
204     bool empty()() scope pure nothrow @nogc @property
205     {
206         if (skipEmptyLines)
207         {
208             if (text.length) for (;;)
209             {
210                 if (text[0] != '\n' && text[0] != '\r')
211                     return false;
212                 text = text[1 .. $];
213                 if (text.length == 0)
214                     return true;
215             }
216             else
217                 return true;
218         }
219         else
220             return text.length == 0;
221     }
222 
223     /++
224     The function has be called after the all row cell values have been precessed.
225     +/
226     void popFront()() scope pure nothrow @nogc
227     {
228         rowIndex++;
229     }
230 
231     ///
232     Row front()() scope return pure nothrow @nogc @property
233     {
234         return typeof(return)(&this, nColumns);
235     }
236 
237     /++
238     Throws: throws an exception if the first row is exists and invalid.
239     +/
240     this(
241         return scope const(char)[] text,
242         char separator = ',',
243         char quote = '"',
244         char comment = '\0',
245         uint skipRows = 0,
246         bool fill = true,
247         bool skipEmptyLines = true,
248         uint nColumns = 0,
249     ) @trusted pure @nogc
250     {
251         pragma(inline, false);
252 
253         while (text.length && (skipRows-- || text[0] == comment))
254         {
255             auto next = text.scanLeftAny('\r', '\n');
256             text = text[$ - next.length + (next.length >= 1) + (next.length > 1 && next[0] == '\r' && next[1] == '\n') .. $];
257         }
258 
259         this.text = text;
260         this.separator = separator;
261         this.quote = quote;
262 
263         if (this.text.length == 0)
264             return;
265 
266         if (!nColumns) for (;;)
267         {
268             nColumns++;
269             auto scalar = readCell();
270             if (scalar.error)
271             {
272                 import mir.exception: toMutable;
273                 import mir.ion.exception: IonException;
274                 static immutable exc = new IonException("mir.csv: left double quote is missing in the first row");
275                 throw exc.toMutable;
276 
277             }
278             if (this.text.length && this.text[0] == separator)
279             {
280                 this.text = this.text[1 .. $];
281                 continue;
282             }
283             if (this.text.length)
284                 this.text = this.text[1 + (this.text.length > 1 && this.text[0] == '\r' && this.text[1] == '\n') .. $];
285             break;
286         }
287 
288         this.nColumns = nColumns;
289         this.text = text;
290     }
291 
292     private Scalar readCell() scope return @trusted pure nothrow @nogc
293     {
294         // if skipLeftSpaces// TODO then stripLeft csv
295         auto quoted = text.length && text[0] == quote;
296         if (!quoted)
297         {
298             auto next = text.scanLeftAny(separator, '\r', '\n');
299             auto ret = text[0 .. text.length - next.length];
300             text = text.ptr[text.length - next.length .. text.length];
301             return Scalar(ret);
302         }
303         buffer.reset;
304 
305         assert(text.length);
306         assert(text[0] == quote);
307         text = text.ptr[1 .. text.length];
308 
309         for (;;)
310         {
311             auto next = text.scanLeftAny(quote);
312 
313             auto isQuote = next.length > 1 && next[1] == quote;
314             auto ret = text[0 .. text.length - next.length + isQuote];
315             text = text.ptr[text.length - next.length + isQuote + (next.length != 0) .. text.length];
316 
317             if (!isQuote && buffer.data.length == 0)
318                 return Scalar(ret, true);
319 
320             buffer.put(ret);
321 
322             if (!isQuote)
323                 return Scalar(buffer.data, true, true);
324         }
325     }
326 }
327 
328 /++
329 Returns: $(NDSLICEREF slice, Slice)`!(string*, 2)`.
330 See_also: $(LREF matrixAsDataFrame)
331 +/
332 Slice!(string*, 2) csvToStringMatrix(
333     return scope string text,
334     char separator = ',',
335     char quote = '"',
336     char comment = '\0',
337     ubyte skipRows = 0,
338     bool fill = true,
339     bool skipEmptyLines = true,
340 ) @trusted pure
341 {
342     pragma(inline, false);
343 
344     import mir.ndslice.slice: Slice;
345     import mir.utility: _expect;
346     import std.array: appender;
347 
348     auto app = appender!(string[]);
349     app.reserve(text.length / 32);
350 
351     auto table = CsvReader(
352         text,
353         separator,
354         quote,
355         comment,
356         skipRows,
357         fill,
358         skipEmptyLines,
359     );
360 
361     auto wip = new string[table.nColumns];
362 
363     while (!table.empty)
364     {
365         auto row = table.front;
366         do
367         {
368             auto elem = row.front;
369             if (_expect(elem.error, false)) 
370                 row.validateCsvError(elem.error);
371 
372             auto value = cast(string) elem.value;
373             if (_expect(elem.isScopeAllocated, false))
374                 value = value.idup;
375 
376             wip[row.columnIndex] = value;
377             row.popFront;
378         }
379         while(!row.empty);
380         app.put(wip);
381         table.popFront;
382     }
383 
384     import mir.ndslice: sliced;
385     assert (app.data.length == table.rowIndex * table.nColumns);
386     return app.data.sliced(table.rowIndex, table.nColumns);
387 }
388 
389 ///
390 version (mir_ion_test)
391 @safe pure
392 unittest
393 {
394     // empty lines are allowed by default
395     auto data = `012,abc,"mno pqr",0` ~ "\n\n" ~ `982,def,"stuv wx",1`
396         ~ "\n" ~ `78,ghijk,"yx",2`;
397 
398     auto matrix = data.csvToStringMatrix();
399 
400     import mir.ndslice.slice: Slice, SliceKind;
401 
402     static assert(is(typeof(matrix) == Slice!(string*, 2)));
403 
404     import mir.test: should;
405     matrix.should ==
406     [[`012`, `abc`, `mno pqr`, `0`], [`982`, `def`, `stuv wx`, `1`], [`78`, `ghijk`, `yx`, `2`]];
407 
408     import mir.ndslice.dynamic: transposed;
409     auto transp = matrix.transposed;
410     static assert(is(typeof(transp) == Slice!(string*, 2, SliceKind.universal)));
411 
412     transp.should ==
413     [[`012`, `982`, `78`], [`abc`, `def`, `ghijk`], [`mno pqr`, `stuv wx`, `yx`], [`0`, `1`, `2`]];
414 }
415 
416 version (mir_ion_test)
417 @safe pure
418 unittest
419 {
420     // Optional parameters to csvToStringMatrix
421     auto data = `012;abc;"mno pqr";0` ~ "\n" ~ `982;def;"stuv wx";1`
422         ~ "\n" ~ `78;ghijk;"yx";2`;
423 
424     import mir.test: should;
425     data.csvToStringMatrix(';', '"').should ==
426     [["012", "abc", "mno pqr", "0"], ["982", "def", "stuv wx", "1"], ["78", "ghijk", "yx", "2"]];
427 }
428 
429 version (mir_ion_test)
430 @safe pure
431 unittest
432 {
433     auto data = `012,aa,bb,cc` ~ "\r\n" ~ `982,dd,ee,ff` ~ "\r\n"
434         ~ `789,gg,hh,ii` ~ "\r\n";
435 
436     import mir.test: should;
437     data.csvToStringMatrix.should ==
438     [["012", "aa", "bb", "cc"], ["982", "dd", "ee", "ff"], ["789", "gg", "hh", "ii"]];
439 }
440 
441 version (mir_ion_test)
442 @safe pure
443 unittest
444 {
445     // Optional parameters here too
446     auto data = `012;aa;bb;cc` ~ "\r\n" ~ `982;dd;ee;ff` ~ "\r\n"
447         ~ `789;gg;hh;ii` ~ "\r\n";
448 
449     import mir.test: should;
450     data.csvToStringMatrix(';', '"').should ==
451     [["012", "aa", "bb", "cc"], ["982", "dd", "ee", "ff"], ["789", "gg", "hh", "ii"]];
452 }
453 
454 version (mir_ion_test)
455 @safe pure
456 unittest
457 {
458     // Quoted fields that contains newlines and delimiters
459     auto data = `012,abc,"ha ha ` ~ "\n" ~ `ha this is a split value",567`
460         ~ "\n" ~ `321,"a,comma,b",def,111` ~ "\n";
461 
462     import mir.test: should;
463     data.csvToStringMatrix.should ==
464     [["012", "abc", "ha ha \nha this is a split value", "567"], ["321", "a,comma,b", "def", "111"]];
465 }
466 
467 version (mir_ion_test)
468 @safe pure
469 unittest
470 {
471     // Quoted fields that contains newlines and delimiters, optional parameters for csvToStringMatrix
472     auto data = `012;abc;"ha ha ` ~ "\n" ~ `ha this is a split value";567`
473         ~ "\n" ~ `321;"a,comma,b";def;111` ~ "\n";
474 
475     import mir.test: should;
476     data.csvToStringMatrix(';', '"').should ==
477     [["012", "abc", "ha ha \nha this is a split value", "567"], ["321", "a,comma,b", "def", "111"]];
478 }
479 
480 version (mir_ion_test)
481 @safe pure
482 unittest
483 {
484     // Quoted fields that contain quotes
485     // (Note: RFC-4180 does not allow doubled quotes in unquoted fields)
486     auto data = `012,"a b ""haha"" c",982` ~ "\n";
487 
488     import mir.test: should;
489     data.csvToStringMatrix.should == [["012", `a b "haha" c`, "982"]];
490 }
491 
492 version (mir_ion_test)
493 @safe pure
494 unittest
495 {
496     // Quoted fields that contain quotes, optional parameters for csvToStringMatrix
497     // (Note: RFC-4180 does not allow doubled quotes in unquoted fields)
498     auto data = `012;"a b ""haha"" c";982` ~ "\n";
499 
500     import mir.test: should;
501     data.csvToStringMatrix(';', '"').should == [["012", `a b "haha" c`, "982"]];
502 }
503 
504 version (mir_ion_test)
505 @safe pure
506 unittest
507 {
508     // Trailing empty fields (bug#1522)
509     import mir.test: should;
510 
511     auto data = `,` ~ "\n";
512     data.csvToStringMatrix.should == [["", ""]];
513 
514     data = `,,` ~ "\n";
515     data.csvToStringMatrix.should == [["", "", ""]];
516 
517     data = "a,b,c,d" ~ "\n" ~ ",,," ~ "\n" ~ ",,," ~ "\n";
518     data.csvToStringMatrix.should == 
519     [["a", "b", "c", "d"], ["", "", "", ""], ["", "", "", ""]];
520 
521     data = "\"a\",b,c,\"d\",";
522     data.csvToStringMatrix.should == [["a", "b", "c", "d", ""]];
523 
524     data = "\"\",\"\",";
525     data.csvToStringMatrix.should == [["", "", ""]];
526 }
527 
528 // Boundary condition checks
529 version (mir_ion_test)
530 @safe pure
531 unittest
532 {
533     import mir.test: should;
534 
535     auto data = `012,792,"def""`;
536     data.csvToStringMatrix.should == [[`012`, `792`, `def"`]];
537 
538     data = `012,792,"def""012`;
539     data.csvToStringMatrix.should == [[`012`, `792`, `def"012`]];
540 
541     data = `012,792,"a"`;
542     data.csvToStringMatrix.should == [[`012`, `792`, `a`]];
543 
544     data = `012,792,"`;
545     data.csvToStringMatrix.should == [[`012`, `792`, ``]];
546 
547     data = `012;;311`;
548     data.csvToStringMatrix(';').should == [[`012`, ``, `311`]];
549 }
550 
551 /++
552 Returns: $(NDSLICEREF slice, Slice)`!(string*, 2)`.
553 See_also: $(LREF matrixAsDataFrame)
554 +/
555 Slice!(CsvAlgebraic*, 2) csvToAlgebraicMatrix(
556     return scope string text,
557     char separator = ',',
558     char quote = '"',
559     scope const CsvProxy.Conversion[] conversions = CsvProxy.init.conversions,
560     char comment = '\0',
561     ubyte skipRows = 0,
562     bool fill = true,
563     bool skipEmptyLines = true,
564     bool parseNumbers = true,
565     bool parseTimestamps = true,
566     CsvAlgebraic delegate(
567         return scope const(char)[] unquotedString,
568         CsvAlgebraic scalar,
569         bool quoted,
570         size_t columnIndex
571     ) @safe pure conversionFinalizer = null
572 ) @trusted pure
573 {
574     pragma(inline, false);
575 
576     import mir.bignum.decimal: Decimal, DecimalExponentKey;
577     import mir.ndslice.slice: Slice;
578     import mir.timestamp: Timestamp;
579     import mir.utility: _expect;
580     import std.array: appender;
581 
582     auto app = appender!(CsvAlgebraic[]);
583     app.reserve(text.length / 32);
584 
585     auto table = CsvReader(
586         text,
587         separator,
588         quote,
589         comment,
590         skipRows,
591         fill,
592         skipEmptyLines,
593     );
594 
595     auto wip = new CsvAlgebraic[table.nColumns];
596 
597     DecimalExponentKey decimalKey;
598     Decimal!128 decimal = void;
599     Timestamp timestamp;
600 
601     while (!table.empty)
602     {
603         auto row = table.front;
604         do
605         {
606             auto elem = row.front;
607             if (_expect(elem.error, false)) 
608                 row.validateCsvError(elem.error);
609 
610             CsvAlgebraic scalar;
611 
612             enum bool allowSpecialValues = true;
613             enum bool allowDotOnBounds = true;
614             enum bool allowDExponent = true;
615             enum bool allowStartingPlus = true;
616             enum bool allowUnderscores = false;
617             enum bool allowLeadingZeros = false;
618             enum bool allowExponent = true;
619             enum bool checkEmpty = true;
620 
621             if (_expect(elem.wasQuoted, false))
622             {
623                 auto value = cast(string) elem.value;
624                 if (_expect(elem.isScopeAllocated, false))
625                     value = value.idup;
626                 scalar = value;
627             }
628             else
629             if (parseNumbers && decimal.fromStringImpl!(
630                 char,
631                 allowSpecialValues,
632                 allowDotOnBounds,
633                 allowDExponent,
634                 allowStartingPlus,
635                 allowUnderscores,
636                 allowLeadingZeros,
637                 allowExponent,
638                 checkEmpty)
639                 (elem.value, decimalKey))
640             {
641                 if (decimalKey)
642                     scalar = cast(double) decimal;
643                 else
644                     scalar = cast(long) decimal.coefficient;
645             }
646             else
647             if (parseTimestamps && Timestamp.fromISOExtString(elem.value, timestamp))
648             {
649                 scalar = timestamp;
650             }
651             else
652             {
653                 foreach(ref target; conversions)
654                 {
655                     if (elem.value == target.from)
656                     {
657                         scalar = target.to;
658                         goto Finalizer;
659                     }
660                 }
661                 scalar = cast(string) elem.value;
662             }
663 
664         Finalizer:
665             if (_expect(conversionFinalizer !is null, false))
666             {
667                 scalar = conversionFinalizer(elem.value, scalar, elem.wasQuoted, row.columnIndex);
668             }
669 
670             wip[row.columnIndex] = scalar;
671             row.popFront;
672         }
673         while(!row.empty);
674         app.put(wip);
675         table.popFront;
676     }
677 
678     import mir.ndslice: sliced;
679     assert (app.data.length == table.rowIndex * table.nColumns);
680     return app.data.sliced(table.rowIndex, table.nColumns);
681 }
682 
683 ///
684 version(mir_ion_test)
685 unittest
686 {
687     import mir.csv;
688     import mir.ion.conv: serde; // to convert CsvProxy to D types
689     import mir.serde: serdeKeys, serdeIgnoreUnexpectedKeys, serdeOptional;
690     // mir.date and std.datetime are supported as well
691     import mir.timestamp: Timestamp;//mir-algorithm package
692     import mir.test: should;
693 
694     auto text =
695 `Date,Open,High,Low,Close,Volume
696 2021-01-21 09:30:00,133.8,134.43,133.59,134.0,9166695,ignoreNoHeader
697 2021-01-21 09:35:00,134.25,135.0,134.19,134.5`;// fill the Volume with '0'
698 
699     // If you don't have a header,
700     // `mir.functional.Tuple` instead of MyDataFrame.
701     @serdeIgnoreUnexpectedKeys //ignore all other columns
702     static struct MyDataFrame
703     {
704         // Few keys are allowed
705         @serdeKeys(`Date`, `date`, `timestamp`)
706         Timestamp[] timestamp;
707 
708         @serdeKeys(`Open`)  double[]    open;
709         @serdeKeys(`High`)  double[]    high;
710         @serdeKeys(`Low`)   double[]    low;
711         @serdeKeys(`Close`) double[]    close;
712 
713         @serdeOptional // if we don't have Volume
714         @serdeKeys(`Volume`)
715         long[]volume;
716     }
717 
718     MyDataFrame testValue = {
719         timestamp:  [`2021-01-21 09:30:00`.Timestamp, `2021-01-21 09:35:00`.Timestamp],
720         volume:     [9166695, 0],
721         open:       [133.8,  134.25],
722         high:       [134.43, 135],
723         low:        [133.59, 134.19],
724         close:      [134.0,  134.5],
725     };
726 
727     auto table = text         // fill the missing and empty fields with '0'
728         .csvToAlgebraicMatrix(',', '"', [CsvProxy.Conversion("", 0.CsvAlgebraic)])
729         .matrixAsDataFrame;
730 
731     table["Volume"][0].should == 9166695;
732     table["Volume"][1].should == 0;
733 
734     table.serde!MyDataFrame.should == testValue;
735 }
736 
737 /++
738 Represent CSV data as dictionary of columns.
739 Uses the first row as header.
740 Returns: a string map that refers the same header and the same data.
741 +/
742 StringMap!(Slice!(T*, 1, SliceKind.universal))
743     matrixAsDataFrame(T)(return scope Slice!(T*, 2) matrix)
744     @trusted pure
745 {
746     import mir.algebraic: isVariant;
747     import mir.array.allocation: array;
748     import mir.ion.exception: IonException;
749     import mir.ndslice.topology: byDim, map, as;
750 
751     if (matrix.length == 0)
752         throw new IonException("mir.csv: Matrix should have at least a single row to get the header");
753     
754     static if (is(T == string))
755         auto keys = matrix[0].field;
756     else
757     static if (isVariant!T)
758         auto keys = matrix[0].map!((ref x) => x.get!string).array;
759     else
760         auto keys = matrix[0].as!string.array;
761 
762     auto data = matrix[1 .. $].byDim!1.array;
763 
764     return typeof(return)(keys, data);
765 }
766 
767 ///
768 version (mir_ion_test)
769 @safe pure
770 unittest
771 {
772     import mir.test: should;
773 
774     auto data = "a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,12";
775 
776     import mir.ndslice.topology: as, map;
777     auto table = data
778         .csvToStringMatrix // see also csvToAlgebraicMatrix
779         .matrixAsDataFrame;
780 
781     
782     table["a"].should == ["1", "4", "7", "10"];
783 
784     table.keys.should == ["a", "b", "c"];
785     table.values
786         .map!(column => column[].as!double)
787         .should == [
788         [1, 4, 7, 10], // first column
789         [2, 5, 8, 11], // ...
790         [3, 6, 9, 12]];
791 }
792 
793 /++
794 +/
795 auto objectsAsTable(bool allowMissingFields = true, T)(return scope const(StringMap!T)[] objects, return scope const(string)[] header)
796     @safe pure nothrow @nogc
797     if (isImplicitlyConvertible!(const T, T))
798 {
799     import mir.algebraic: Variant;
800     import mir.ndslice.concatenation: concatenation;
801     import mir.ndslice.slice: Slice, sliced;
802     import mir.ndslice.topology: as, repeat;
803 
804     auto rows = objectsAsRows!allowMissingFields(objects, header);
805 
806     alias V = Variant!(typeof(rows[0]), Slice!(const(string)*));
807 
808     return V(header.sliced).repeat(1).concatenation(rows.as!V);
809 }
810 
811 ///
812 version (mir_ion_test)
813 @safe pure
814 unittest
815 {
816     import mir.algebraic_alias.csv: T = CsvAlgebraic;
817     import mir.algebraic: Nullable;
818     import mir.date: Date;
819     import mir.test: should;
820 
821     auto o1 = ["a" : 1.T,
822                "b" : 2.0.T]
823         .StringMap!T;
824     auto o2 = ["b" : true.T,
825                "c" : false.T]
826         .StringMap!T;
827     auto o3 = ["c" : Date(2021, 12, 12).T,
828                "d" : 3.T]
829         .StringMap!T;
830 
831     import mir.ser.text: serializeText;
832     [o1, o2, o3].objectsAsTable(["b", "c"]).serializeText.should
833     == `[["b","c"],[2.0,null],[true,false],[null,2021-12-12]]`;
834 
835     [o1, o2].objectsAsTable!false(["b"]).serializeText.should
836          == `[["b"],[2.0],[true]]`;
837 
838     import std.exception: assertThrown;
839     import mir.ion.exception: IonException;
840     [o1, o2, o3].objectsAsTable!false(["b", "c"]).serializeText
841         .assertThrown!IonException;
842 }
843 
844 /++
845 Contruct a lazy random-access-range (ndslice)
846 Returns:
847     a lazy 1-dimensional slice of lazy 1-dimensionalal slices
848 +/
849 auto objectsAsRows(bool allowMissingFields = true, T)(return scope const(StringMap!T)[] objects, return scope const(string)[] header)
850     @safe pure nothrow @nogc
851     if (isImplicitlyConvertible!(const T, T))
852 {
853     import mir.ndslice.topology: repeat, map, zip, iota;
854 
855     static if (allowMissingFields)
856     {
857         return header
858             .repeat(objects.length)
859             .zip(objects)
860             .map!(
861                 (header, object) => object
862                     .repeat(header.length)
863                     .zip(header)
864                     .map!(
865                         (object, name)
866                         {
867                             import mir.algebraic: Nullable;
868                             if (auto ptr = name in object)
869                                 return Nullable!T(*ptr);
870                             return Nullable!T.init;
871                         }
872                     )
873             );
874     }
875     else
876     {
877         return header
878             .repeat(objects.length)
879             .zip(objects, objects.length.iota)
880             .map!(
881                 (header, object, row) => object
882                     .repeat(header.length)
883                     .zip(header, row.repeat(header.length))
884                     .map!(
885                         (object, name, row)
886                         {
887                             if (auto ptr = name in object)
888                                 return *ptr;
889                             import mir.ion.exception: IonMirException;
890                             throw new IonMirException("mir.csv: row ", row + 1, ": missing field '", name, "'");
891                         }
892                     )
893             );
894     }
895 }
896 
897 ///
898 version (mir_ion_test)
899 @safe pure
900 unittest
901 {
902     import mir.algebraic_alias.csv: T = CsvAlgebraic;
903     import mir.algebraic: Nullable;
904     import mir.date: Date;
905     import mir.test: should;
906 
907     auto o1 = ["a" : 1.T,
908                "b" : 2.0.T]
909         .StringMap!T;
910     auto o2 = ["b" : true.T,
911                "c" : false.T]
912         .StringMap!T;
913     auto o3 = ["c" : Date(2021, 12, 12).T,
914                "d" : 3.T]
915         .StringMap!T;
916     
917     alias NCA = Nullable!T;
918 
919     auto rows = [o1, o2, o3].objectsAsRows(["b", "c"]);
920     rows.should == [
921         // a                           b
922         [NCA(2.0.T),  NCA(null)],
923         [NCA(true.T), NCA(false.T)],
924         [NCA(null),   NCA(Date(2021, 12, 12))],
925     ];
926 
927     static assert(is(typeof(rows[0][0]) == NCA));
928 
929     // evaluate
930     import mir.ndslice.fuse: fuse;
931     static assert(is(typeof(rows.fuse) == Slice!(NCA*, 2))); 
932 }
933 
934 /++
935 Returns:
936     all keys of all the objects in the observed order. 
937 Params:
938     objects = array of objects (string maps)
939 +/
940 string[] inclusiveHeader(T)(return scope const(StringMap!T)[] objects)
941     @safe pure nothrow
942 {
943     if (objects.length == 0)
944         return null;
945     
946     auto map = StringMap!bool(
947         objects[0].keys.dup,
948         new bool[objects[0].keys.length]);
949 
950     foreach (object; objects[1 .. $])
951         foreach (key; object.keys)
952             map[key] = false;
953 
954     return (()@trusted => cast(string[]) map.keys)();
955 }
956 
957 ///
958 version (mir_ion_test)
959 @safe pure
960 unittest
961 {
962     import mir.test: should;
963 
964     auto o1 = ["a", "b"].StringMap!int([8, 8]);
965     auto o2 = ["b", "c"].StringMap!int([8, 8]);
966     auto o3 = ["c", "d"].StringMap!int([8, 8]);
967     [o1, o2, o3].inclusiveHeader.should = ["a", "b", "c", "d"];
968     [o3, o2, o1].inclusiveHeader.should = ["c", "d", "b", "a"];
969 }
970 
971 /++
972 Returns:
973     common keys of all the objects in the observed order. 
974 Params:
975     objects = array of objects (string maps)
976 +/
977 string[] intersectionHeader(T)(return scope const(StringMap!T)[] objects)
978     @safe pure nothrow
979 {
980     if (objects.length == 0)
981         return null;
982     
983     auto map = StringMap!bool(
984         objects[0].keys.dup,
985         new bool[objects[0].keys.length]);
986 
987     foreach (object; objects[1 .. $])
988         foreach (key; map.keys)
989             if (key !in object)
990                 map.remove(key);
991 
992     return (()@trusted => cast(string[]) map.keys)();
993 }
994 
995 ///
996 version (mir_ion_test)
997 @safe pure
998 unittest
999 {
1000     import mir.test: should;
1001 
1002     auto o1 = ["a", "b"].StringMap!int([8, 8]);
1003     auto o2 = ["b", "c"].StringMap!int([8, 8]);
1004     auto o3 = ["c", "d"].StringMap!int([8, 8]);
1005     [o1, o2].intersectionHeader.should = ["b"];
1006     [o3, o2].intersectionHeader.should = ["c"];
1007 }
1008 
1009 /++
1010 CSV serialization function.
1011 +/
1012 string serializeCsv(V)(
1013     auto scope ref const V value,
1014     char separator = ',',
1015     char quote = '"',
1016     bool quoteAll = false,
1017     string naValue = "",
1018     string trueValue = "TRUE",
1019     string falseValue = "FALSE",
1020     int serdeTarget = SerdeTarget.csv)
1021 {
1022     import std.array: appender;
1023     auto app = appender!(char[]);
1024     .serializeCsv!(typeof(app), V)(app, value,
1025     separator,
1026     quote,
1027     quoteAll,
1028     naValue,
1029     trueValue,
1030     falseValue,
1031     serdeTarget);
1032     return (()@trusted => cast(string) app.data)();
1033 }
1034 
1035 ///
1036 version(mir_ion_test)
1037 @safe pure
1038 unittest
1039 {
1040     import mir.timestamp: Timestamp;
1041     import mir.format: stringBuf;
1042     import mir.test;
1043     auto someMatrix = [
1044         [3.0.CsvAlgebraic, 2.CsvAlgebraic, true.CsvAlgebraic, ],
1045         ["str".CsvAlgebraic, "2022-12-12".Timestamp.CsvAlgebraic, "".CsvAlgebraic, null.CsvAlgebraic],
1046         [double.nan.CsvAlgebraic, double.infinity.CsvAlgebraic, 0.0.CsvAlgebraic]
1047     ];
1048 
1049     someMatrix.serializeCsv.should == "3.0,2,TRUE\nstr,2022-12-12,\"\",\nNAN,+INF,0.0\n";
1050 }
1051 
1052 /++
1053 Ion serialization for custom outputt range.
1054 +/
1055 void serializeCsv(Appender, V)(
1056     scope ref Appender appender,
1057     auto scope ref const V value,
1058     char separator = ',',
1059     char quote = '"',
1060     bool quoteAll = false,
1061     string naValue = "",
1062     string trueValue = "TRUE",
1063     string falseValue = "FALSE",
1064     int serdeTarget = SerdeTarget.csv)
1065     if (isOutputRange!(Appender, const(char)[]) && isOutputRange!(Appender, char))
1066 {
1067     auto serializer = CsvSerializer!Appender((()@trusted => &appender)());
1068     serializer.serdeTarget = serdeTarget;
1069     serializer.separator = separator;
1070     serializer.quote = quote;
1071     serializer.quoteAll = quoteAll;
1072     serializer.naValue = naValue;
1073     serializer.trueValue = trueValue;
1074     serializer.falseValue = falseValue;
1075     import mir.ser: serializeValue;
1076     serializeValue(serializer, value);
1077 }
1078 
1079 ///
1080 @safe pure // nothrow @nogc
1081 unittest
1082 {
1083     import mir.timestamp: Timestamp;
1084     import mir.format: stringBuf;
1085     import mir.test;
1086 
1087     auto someMatrix = [
1088         ["str".CsvAlgebraic, 2.CsvAlgebraic, true.CsvAlgebraic],
1089         [3.0.CsvAlgebraic, "2022-12-12".Timestamp.CsvAlgebraic, null.CsvAlgebraic]
1090     ];
1091 
1092     auto buffer = stringBuf;
1093     buffer.serializeCsv(someMatrix);
1094     buffer.data.should == "str,2,TRUE\n3.0,2022-12-12,\n";
1095 }
1096 
1097 ///
1098 struct CsvSerializer(Appender)
1099 {
1100     import mir.bignum.decimal: Decimal;
1101     import mir.bignum.integer: BigInt;
1102     import mir.format: print, stringBuf, printReplaced;
1103     import mir.internal.utility: isFloatingPoint;
1104     import mir.ion.type_code;
1105     import mir.lob;
1106     import mir.string: containsAny;
1107     import mir.timestamp;
1108     import std.traits: isNumeric;
1109 
1110     /++
1111     CSV string buffer
1112     +/
1113     Appender* appender;
1114 
1115     /// Scalar separator
1116     char separator = ',';
1117     /// Symbol to quote scalars
1118     char quote = '"';
1119     ///
1120     bool quoteAll;
1121 
1122     ///
1123     string naValue = "";
1124     ///
1125     string trueValue = "TRUE";
1126     ///
1127     string falseValue = "FALSE";
1128 
1129     /// Mutable value used to choose format specidied or user-defined serialization specializations
1130     int serdeTarget = SerdeTarget.csv;
1131 
1132     private uint level, row, column;
1133 
1134 
1135 @safe scope:
1136 
1137     ///
1138     size_t stringBegin()
1139     {
1140         appender.put('"');
1141         return 0;
1142     }
1143 
1144     /++
1145     Puts string part. The implementation allows to split string unicode points.
1146     +/
1147     void putStringPart(scope const(char)[] value)
1148     {
1149         printReplaced(appender, value, '"', `""`);
1150     }
1151 
1152     ///
1153     void stringEnd(size_t)
1154     {
1155         appender.put('"');
1156     }
1157 
1158     ///
1159     size_t structBegin(size_t length = size_t.max)
1160     {
1161         throw new Exception("mir.csv: structure serialization isn't supported: ");
1162     }
1163 
1164     ///
1165     void structEnd(size_t state)
1166     {
1167         throw new Exception("mir.csv: structure serialization isn't supported");
1168     }
1169 
1170     ///
1171     size_t listBegin(size_t length = size_t.max)
1172     {
1173         assert(level <= 2);
1174         if (level++ >= 2)
1175             throw new Exception("mir.csv: arrays can't be serialized as scalar values");
1176         return 0;
1177     }
1178 
1179     ///
1180     void listEnd(size_t state)
1181     {
1182         if (level-- == 2)
1183         {
1184             column = 0;
1185             appender.put('\n');
1186         }
1187         else
1188         {
1189             row = 0;
1190         }
1191     }
1192 
1193     ///
1194     alias sexpBegin = listBegin;
1195 
1196     ///
1197     alias sexpEnd = listEnd;
1198 
1199     ///
1200     void putSymbol(scope const char[] symbol)
1201     {
1202         putValue(symbol);
1203     }
1204 
1205     ///
1206     void putAnnotation(scope const(char)[] annotation)
1207     {
1208         assert(0);
1209     }
1210 
1211     ///
1212     auto annotationsEnd(size_t state)
1213     {
1214         assert(0);
1215     }
1216 
1217     ///
1218     size_t annotationWrapperBegin(size_t length = size_t.max)
1219     {
1220         throw new Exception("mir.csv: annotation serialization isn't supported");
1221     }
1222 
1223     ///
1224     void annotationWrapperEnd(size_t annotationsState, size_t state)
1225     {
1226         assert(0);
1227     }
1228 
1229     ///
1230     void nextTopLevelValue()
1231     {
1232         appender.put('\n');
1233     }
1234 
1235     ///
1236     void putKey(scope const char[] key)
1237     {
1238         assert(0);
1239     }
1240 
1241     ///
1242     void putValue(Num)(const Num value)
1243         if (isNumeric!Num && !is(Num == enum))
1244     {
1245         auto buf = stringBuf;
1246         static if (isFloatingPoint!Num)
1247         {
1248             import mir.math.common: fabs;
1249 
1250             if (value.fabs < value.infinity)
1251                 print(buf, value);
1252             else if (value == Num.infinity)
1253                 buf.put(`+INF`);
1254             else if (value == -Num.infinity)
1255                 buf.put(`-INF`);
1256             else
1257                 buf.put(`NAN`);
1258         }
1259         else
1260             print(buf, value);
1261         putValue(buf.data);
1262     }
1263 
1264     ///
1265     void putValue(size_t size)(auto ref const BigInt!size num)
1266     {
1267         auto buf = stringBuf;
1268         num.toString(buf);
1269         putValue(buf.data);
1270     }
1271 
1272     ///
1273     void putValue(size_t size)(auto ref const Decimal!size num)
1274     {
1275         auto buf = stringBuf;
1276         num.toString(buf);
1277         putValue(buf.data);
1278     }
1279 
1280     ///
1281     void putValue(typeof(null))
1282     {
1283         putValue(naValue, true);
1284     }
1285 
1286     /// ditto 
1287     void putNull(IonTypeCode code)
1288     {
1289         putValue(null);
1290     }
1291 
1292     ///
1293     void putValue(bool b)
1294     {
1295         putValue(b ? trueValue : falseValue, true);
1296     }
1297 
1298     ///
1299     void putValue(scope const char[] value, bool noQuote = false)
1300     {
1301         import mir.ion.exception: IonMirException;
1302         import mir.utility: _expect;
1303 
1304         if (_expect(level != 2, false))
1305             throw new IonMirException(
1306                 "mir.csv: expected ",
1307                 level ? "row" : "table",
1308                 " value, got scalar value '", value, "'");
1309 
1310         if (!quoteAll
1311          && (noQuote || !value.containsAny(separator, quote, '\n'))
1312          && ((value == naValue || value == trueValue || value == falseValue) == noQuote)
1313         )
1314         {
1315             appender.put(value);
1316         }
1317         else
1318         {
1319             auto state = stringBegin;
1320             putStringPart(value);
1321             stringEnd(state);
1322         }
1323     }
1324 
1325     ///
1326     void putValue(scope Clob value)
1327     {
1328         import mir.format: printEscaped, EscapeFormat;
1329 
1330         auto buf = stringBuf;
1331 
1332         buf.put(`{{"`);
1333 
1334         printEscaped!(char, EscapeFormat.ionClob)(buf, value.data);
1335 
1336         buf.put(`"}}`);
1337 
1338         putValue(buf.data);
1339     }
1340 
1341     ///
1342     void putValue(scope Blob value)
1343     {
1344         import mir.base64 : encodeBase64;
1345 
1346         auto buf = stringBuf;
1347 
1348         buf.put("{{");
1349 
1350         encodeBase64(value.data, buf);
1351 
1352         buf.put("}}");
1353 
1354         putValue(buf.data);
1355     }
1356 
1357     ///
1358     void putValue(Timestamp value)
1359     {
1360         auto buf = stringBuf;
1361         value.toISOExtString(buf);
1362         putValue(buf.data);
1363     }
1364 
1365     ///
1366     void elemBegin()
1367     {
1368         if (level == 2)
1369         {
1370             if (column++)
1371                 appender.put(separator);
1372         }
1373         else
1374         {
1375             row++;
1376         }
1377     }
1378 
1379     ///
1380     alias sexpElemBegin = elemBegin;
1381 }
1382 
1383 /++
1384 A proxy that allows to converty CSV to a table in another data format.
1385 +/
1386 struct CsvProxy
1387 {
1388     import mir.algebraic_alias.csv: CsvAlgebraic;
1389     import mir.ion.exception: IonMirException;
1390     /// An input CSV text. BOM isn't supported.
1391     const(char)[] text;
1392     /// If true the elements in the first row are symbolised.
1393     bool hasHeader;
1394     /// Scalar separator
1395     char separator = ',';
1396     /// Symbol to quote scalars
1397     char quote = '"';
1398     /// Skips rows the first consequent lines, which starts with this character.
1399     char comment = '\0';
1400     /// Skips a number of rows
1401     ubyte skipRows;
1402     ///
1403     bool fill = true;
1404     ///
1405     bool skipEmptyLines = true;
1406     /// If true the parser tries to recognsise and parse numbers.
1407     bool parseNumbers = true;
1408     /// If true the parser tries to recognsise and parse
1409     // ISO timestamps in the extended form.
1410     bool parseTimestamps = true;
1411 
1412     /// A number of conversion conventions.
1413     struct Conversion
1414     {
1415         ///
1416         string from;
1417         ///
1418         CsvAlgebraic to;
1419     }
1420 
1421     /++
1422     The conversion map represented as array of `from->to` pairs.
1423 
1424     Note:
1425     automated number recognition works with values like `NaN` and `+Inf` already.
1426     +/
1427     const(Conversion)[] conversions = [
1428         Conversion("", null.CsvAlgebraic),
1429         Conversion("TRUE", true.CsvAlgebraic),
1430         Conversion("FALSE", false.CsvAlgebraic),
1431     ];
1432 
1433     /++
1434     N/A and NULL patterns are converted to Ion `null` when exposed to arrays
1435     and skipped when exposed to objects
1436     +/
1437     const(string)[] naStrings = [
1438         ``,
1439     ];
1440 
1441     const(string)[] trueStrings = [
1442         `TRUE`,
1443     ];
1444 
1445     const(string)[] falseStrings = [
1446         `FALSE`,
1447     ];
1448 
1449     /// File name for berrer error messages
1450     string fileName = "<unknown>";
1451 
1452     // /++
1453     // +/
1454     // bool delegate(size_t columnIndex, scope const(char)[] columnName) useColumn;
1455 
1456     /++
1457     Conversion callback to finish conversion resolution
1458     Params:
1459         unquotedString = string after unquoting
1460         kind = currently recognized path
1461         columnIndex = column index starting from 0
1462     +/
1463     CsvAlgebraic delegate(
1464         return scope const(char)[] unquotedString,
1465         CsvAlgebraic scalar,
1466         bool quoted,
1467         size_t columnIndex
1468     ) @safe pure @nogc conversionFinalizer;
1469 
1470     /++
1471     +/
1472     static bool defaultIsSymbolHandler(scope const(char)[] symbol, bool quoted) @safe pure @nogc nothrow
1473     {
1474         import mir.algorithm.iteration: all;
1475         return !quoted && symbol.length && symbol.all!(
1476             c =>
1477                 'a' <= c && c <= 'z' ||
1478                 'A' <= c && c <= 'Z' ||
1479                 c == '_'
1480         );
1481     }
1482 
1483     /++
1484     A function used to determine if a string should be passed
1485     to a serializer as a symbol instead of strings.
1486     That may help to reduce memory allocation for data with
1487     a huge amount of equal cell values.``
1488     The default pattern follows regular expression `[a-zA-Z_]+`
1489     and requires symbol to be presented without double quotes.
1490     +/
1491     bool function(scope const(char)[] symbol, bool quoted) @safe pure @nogc isSymbolHandler = &defaultIsSymbolHandler;
1492 
1493     void serialize(S)(scope ref S serializer) scope const @trusted
1494     {
1495         import mir.bignum.decimal: Decimal, DecimalExponentKey;
1496         import mir.exception: MirException;
1497         import mir.ser: serializeValue;
1498         import mir.timestamp: Timestamp;
1499         import mir.utility: _expect;
1500 
1501         auto table = CsvReader(
1502             text,
1503             separator,
1504             quote,
1505             comment,
1506             skipRows,
1507             fill,
1508             skipEmptyLines,
1509         );
1510 
1511         if (hasHeader && table.empty)
1512         {
1513             serializer.putValue(null);
1514             return;
1515         }
1516 
1517         DecimalExponentKey decimalKey;
1518         Decimal!128 decimal = void;
1519         Timestamp timestamp;
1520 
1521         size_t outerState = serializer.listBegin;
1522 
1523         if (hasHeader)
1524         {
1525             serializer.elemBegin;
1526             auto state = serializer.listBegin;
1527             foreach (elem; table.front)
1528             {
1529                 assert(!elem.error);
1530                 serializer.elemBegin;
1531                 serializer.putSymbol(elem.value);
1532             }
1533             serializer.listEnd(state);
1534             table.popFront;
1535         }
1536 
1537         do
1538         {
1539             serializer.elemBegin;
1540             auto state = serializer.listBegin;
1541             auto row = table.front;
1542             do
1543             {
1544                 auto elem = row.front;
1545 
1546                 if (_expect(elem.error, false)) 
1547                     row.validateCsvError(elem.error);
1548 
1549                 CsvAlgebraic scalar;
1550 
1551                 enum bool allowSpecialValues = true;
1552                 enum bool allowDotOnBounds = true;
1553                 enum bool allowDExponent = true;
1554                 enum bool allowStartingPlus = true;
1555                 enum bool allowUnderscores = false;
1556                 enum bool allowLeadingZeros = false;
1557                 enum bool allowExponent = true;
1558                 enum bool checkEmpty = true;
1559 
1560                 if (_expect(elem.wasQuoted, false))
1561                 {
1562                     scalar = cast(string) elem.value;
1563                 }
1564                 else
1565                 if (parseNumbers && decimal.fromStringImpl!(
1566                     char,
1567                     allowSpecialValues,
1568                     allowDotOnBounds,
1569                     allowDExponent,
1570                     allowStartingPlus,
1571                     allowUnderscores,
1572                     allowLeadingZeros,
1573                     allowExponent,
1574                     checkEmpty)
1575                     (elem.value, decimalKey))
1576                 {
1577                     if (decimalKey)
1578                         scalar = cast(double) decimal;
1579                     else
1580                         scalar = cast(long) decimal.coefficient;
1581                 }
1582                 else
1583                 if (parseTimestamps && Timestamp.fromISOExtString(elem.value, timestamp))
1584                 {
1585                     scalar = timestamp;
1586                 }
1587                 else
1588                 {
1589                     foreach(ref target; conversions)
1590                     {
1591                         if (elem.value == target.from)
1592                         {
1593                             scalar = target.to;
1594                             goto Finalizer;
1595                         }
1596                     }
1597                     scalar = cast(string) elem.value;
1598                 }
1599 
1600             Finalizer:
1601                 if (_expect(conversionFinalizer !is null, false))
1602                 {
1603                     scalar = conversionFinalizer(elem.value, scalar, elem.wasQuoted, row.columnIndex);
1604                 }
1605                 serializer.elemBegin;
1606                 serializer.serializeValue(scalar);                
1607                 row.popFront;
1608             }
1609             while(!row.empty);
1610             serializer.listEnd(state);
1611             table.popFront;
1612         }
1613         while (!table.empty);
1614         serializer.listEnd(outerState);
1615     }
1616 }
1617 
1618 /// Matrix
1619 version (mir_ion_test)
1620 @safe pure
1621 unittest
1622 {
1623     import mir.test: should;
1624     import mir.ndslice.slice: Slice;
1625     import mir.ion.conv: serde;
1626     import mir.ser.text;
1627 
1628     alias Matrix = Slice!(double*, 2);
1629 
1630     auto text = "1,2\n3,4\r\n5,6\n";
1631 
1632     auto matrix = text.CsvProxy.serde!Matrix;
1633     matrix.should == [[1, 2], [3, 4], [5, 6]];
1634 }
1635 
1636 /++
1637 Type resolution is performed for types defined in $(MREF mir,algebraic_alias,csv):
1638 
1639 $(UL 
1640     $(LI `typeof(null)` - used for N/A values)
1641     $(LI `bool`)
1642     $(LI `long`)
1643     $(LI `double`)
1644     $(LI `string`)
1645     $(LI $(AlgorithmREF timestamp, Timestamp))
1646 )
1647 +/
1648 version (mir_ion_test)
1649 @safe pure
1650 unittest
1651 {
1652     import mir.ion.conv: serde;
1653     import mir.ndslice.slice: Slice;
1654     import mir.ser.text: serializeTextPretty;
1655     import mir.test: should;
1656     import std.string: join;
1657 
1658     // alias Matrix = Slice!(CsvAlgebraic*, 2);
1659 
1660     CsvProxy csv = {
1661         conversionFinalizer : (
1662             unquotedString,
1663             scalar,
1664             wasQuoted,
1665             columnIndex)
1666         {
1667             // Do we want to symbolize the data?
1668             return !wasQuoted && unquotedString == `Billion` ?
1669                 1000000000.CsvAlgebraic :
1670                 scalar;
1671         },
1672         text : join([
1673             // User-defined conversion
1674             `Billion`
1675             // `long` patterns
1676             , `100`, `+200`, `-200`
1677             // `double` pattern
1678             , `+1.0`, `-.2`, `3.`, `3e-10`, `3d20`
1679             // also `double` pattern
1680             , `inf`, `+Inf`, `-INF`, `+NaN`, `-nan`, `NAN`
1681             // `bool` patterns
1682             , `TRUE`, `FALSE`
1683             // `Timestamp` patterns
1684             , `2021-02-03` // iso8601 extended
1685             , `2001-12-15T02:59:43.1Z` //canonical
1686             // Default NA patterns are converted to Ion `null` when exposed to arrays
1687             // and skipped when exposed to objects
1688             , ``
1689             // strings
1690             , `100_000`
1691             , `_ab0`
1692             , `_abc`
1693             , `Str`
1694             , `Value100`
1695             , `iNF`
1696             , `Infinity`
1697             , `+Infinity`
1698             , `.Infinity`
1699             , `""`
1700             , ` `
1701         ], `,`)
1702     };
1703 
1704     // Serializing CsvProxy to Amazon Ion (text version)
1705     csv.serializeTextPretty!"    ".should ==
1706 `[
1707     [
1708         1000000000,
1709         100,
1710         200,
1711         -200,
1712         1.0,
1713         -0.2,
1714         3.0,
1715         3e-10,
1716         3e+20,
1717         +inf,
1718         +inf,
1719         -inf,
1720         nan,
1721         nan,
1722         nan,
1723         true,
1724         false,
1725         2021-02-03,
1726         2001-12-15T02:59:43.1Z,
1727         null,
1728         "100_000",
1729         "_ab0",
1730         "_abc",
1731         "Str",
1732         "Value100",
1733         "iNF",
1734         "Infinity",
1735         "+Infinity",
1736         ".Infinity",
1737         "",
1738         " "
1739     ]
1740 ]`;
1741 }
1742 
1743 /++
1744 Transposed Matrix & Tuple support
1745 +/
1746 version (mir_ion_test)
1747 @safe pure
1748 unittest
1749 {
1750     import mir.ion.conv: serde;
1751     import mir.date: Date; //also wotks with mir.timestamp and std.datetime
1752     import mir.functional: Tuple;
1753     import mir.ser.text: serializeText;
1754     import mir.test: should;
1755     import mir.ndslice.dynamic: transposed;
1756 
1757     auto text = "str,2022-10-12,3.4\nb,2022-10-13,2\n";
1758 
1759     auto matrix = text.CsvProxy.serde!(Slice!(CsvAlgebraic*, 2));
1760     matrix.transposed.serializeText.should
1761         == q{[["str","b"],[2022-10-12,2022-10-13],[3.4,2]]};
1762 
1763     alias T = Tuple!(string[], Date[], double[]);
1764 
1765     matrix.transposed.serde!T.should == T(
1766             [`str`, `b`],
1767             [Date(2022, 10, 12), Date(2022, 10, 13)],
1768             [3.4, 2],
1769     );
1770 }
1771 
1772 /// Converting NA to NaN
1773 version (mir_ion_test)
1774 @safe pure
1775 unittest
1776 {
1777     import mir.csv;
1778     import mir.algebraic: Nullable, visit;
1779     import mir.ion.conv: serde;
1780     import mir.ndslice: Slice, map, slice;
1781     import mir.ser.text: serializeText;
1782     import mir.test: should;
1783 
1784     auto text = "1,2\n3,4\n5,\n";
1785     auto matrix = text
1786         .CsvProxy
1787         .serde!(Slice!(Nullable!double*, 2))
1788         .map!(visit!((double x) => x, (_) => double.nan))
1789         .slice;
1790 
1791     matrix.serializeText.should == q{[[1.0,2.0],[3.0,4.0],[5.0,nan]]};
1792 }