The OpenD Programming Language

CsvProxy

A proxy that allows to converty CSV to a table in another data format.

Members

Static functions

defaultIsSymbolHandler
bool defaultIsSymbolHandler(const(char)[] symbol, bool quoted)

Structs

Conversion
struct Conversion

A number of conversion conventions.

Variables

comment
char comment;

Skips rows the first consequent lines, which starts with this character.

conversionFinalizer
CsvAlgebraic delegate(return scope const(char)[] unquotedString, CsvAlgebraic scalar, bool quoted, size_t columnIndex) @(safe) pure @(nogc) conversionFinalizer;

Conversion callback to finish conversion resolution

conversions
const(Conversion)[] conversions;

The conversion map represented as array of from->to pairs.

fileName
string fileName;

File name for berrer error messages

fill
bool fill;
hasHeader
bool hasHeader;

If true the elements in the first row are symbolised.

isSymbolHandler
bool function(scope const(char)[] symbol, bool quoted) @(safe) pure @(nogc) isSymbolHandler;

A function used to determine if a string should be passed to a serializer as a symbol instead of strings. That may help to reduce memory allocation for data with a huge amount of equal cell values.` The default pattern follows regular expression [a-zA-Z_]+ and requires symbol to be presented without double quotes.

naStrings
const(string)[] naStrings;

N/A and NULL patterns are converted to Ion null when exposed to arrays and skipped when exposed to objects

parseNumbers
bool parseNumbers;

If true the parser tries to recognsise and parse numbers.

parseTimestamps
bool parseTimestamps;

If true the parser tries to recognsise and parse

quote
char quote;

Symbol to quote scalars

separator
char separator;

Scalar separator

skipEmptyLines
bool skipEmptyLines;
skipRows
ubyte skipRows;

Skips a number of rows

text
const(char)[] text;

An input CSV text. BOM isn't supported.

Examples

Matrix

import mir.test: should;
import mir.ndslice.slice: Slice;
import mir.ion.conv: serde;
import mir.ser.text;

alias Matrix = Slice!(double*, 2);

auto text = "1,2\n3,4\r\n5,6\n";

auto matrix = text.CsvProxy.serde!Matrix;
matrix.should == [[1, 2], [3, 4], [5, 6]];

Type resolution is performed for types defined in mir.algebraic_alias.csv:

  • typeof(null) - used for N/A values
  • bool
  • long
  • double
  • string
  • $(AlgorithmREF timestamp, Timestamp)
1     import mir.ion.conv: serde;
2     import mir.ndslice.slice: Slice;
3     import mir.ser.text: serializeTextPretty;
4     import mir.test: should;
5     import std.string: join;
6 
7     // alias Matrix = Slice!(CsvAlgebraic*, 2);
8 
9     CsvProxy csv = {
10         conversionFinalizer : (
11             unquotedString,
12             scalar,
13             wasQuoted,
14             columnIndex)
15         {
16             // Do we want to symbolize the data?
17             return !wasQuoted && unquotedString == `Billion` ?
18                 1000000000.CsvAlgebraic :
19                 scalar;
20         },
21         text : join([
22             // User-defined conversion
23             `Billion`
24             // `long` patterns
25             , `100`, `+200`, `-200`
26             // `double` pattern
27             , `+1.0`, `-.2`, `3.`, `3e-10`, `3d20`
28             // also `double` pattern
29             , `inf`, `+Inf`, `-INF`, `+NaN`, `-nan`, `NAN`
30             // `bool` patterns
31             , `TRUE`, `FALSE`
32             // `Timestamp` patterns
33             , `2021-02-03` // iso8601 extended
34             , `2001-12-15T02:59:43.1Z` //canonical
35             // Default NA patterns are converted to Ion `null` when exposed to arrays
36             // and skipped when exposed to objects
37             , ``
38             // strings
39             , `100_000`
40             , `_ab0`
41             , `_abc`
42             , `Str`
43             , `Value100`
44             , `iNF`
45             , `Infinity`
46             , `+Infinity`
47             , `.Infinity`
48             , `""`
49             , ` `
50         ], `,`)
51     };
52 
53     // Serializing CsvProxy to Amazon Ion (text version)
54     csv.serializeTextPretty!"    ".should ==
55 `[
56     [
57         1000000000,
58         100,
59         200,
60         -200,
61         1.0,
62         -0.2,
63         3.0,
64         3e-10,
65         3e+20,
66         +inf,
67         +inf,
68         -inf,
69         nan,
70         nan,
71         nan,
72         true,
73         false,
74         2021-02-03,
75         2001-12-15T02:59:43.1Z,
76         null,
77         "100_000",
78         "_ab0",
79         "_abc",
80         "Str",
81         "Value100",
82         "iNF",
83         "Infinity",
84         "+Infinity",
85         ".Infinity",
86         "",
87         " "
88     ]
89 ]`;

Transposed Matrix & Tuple support

import mir.ion.conv: serde;
import mir.date: Date; //also wotks with mir.timestamp and std.datetime
import mir.functional: Tuple;
import mir.ser.text: serializeText;
import mir.test: should;
import mir.ndslice.dynamic: transposed;

auto text = "str,2022-10-12,3.4\nb,2022-10-13,2\n";

auto matrix = text.CsvProxy.serde!(Slice!(CsvAlgebraic*, 2));
matrix.transposed.serializeText.should
    == q{[["str","b"],[2022-10-12,2022-10-13],[3.4,2]]};

alias T = Tuple!(string[], Date[], double[]);

matrix.transposed.serde!T.should == T(
        [`str`, `b`],
        [Date(2022, 10, 12), Date(2022, 10, 13)],
        [3.4, 2],
);

Converting NA to NaN

import mir.csv;
import mir.algebraic: Nullable, visit;
import mir.ion.conv: serde;
import mir.ndslice: Slice, map, slice;
import mir.ser.text: serializeText;
import mir.test: should;

auto text = "1,2\n3,4\n5,\n";
auto matrix = text
    .CsvProxy
    .serde!(Slice!(Nullable!double*, 2))
    .map!(visit!((double x) => x, (_) => double.nan))
    .slice;

matrix.serializeText.should == q{[[1.0,2.0],[3.0,4.0],[5.0,nan]]};

Meta