1 //Written in the D programming language 2 3 /** 4 * Implements functionality to read Comma Separated Values and its variants 5 * from an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of `dchar`. 6 * 7 * Comma Separated Values provide a simple means to transfer and store 8 * tabular data. It has been common for programs to use their own 9 * variant of the CSV format. This parser will loosely follow the 10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere 11 * to the following criteria (differences from RFC-4180 in parentheses): 12 * 13 * $(UL 14 * $(LI A record is separated by a new line (CRLF,LF,CR)) 15 * $(LI A final record may end with a new line) 16 * $(LI A header may be provided as the first record in input) 17 * $(LI A record has fields separated by a comma (customizable)) 18 * $(LI A field containing new lines, commas, or double quotes 19 * should be enclosed in double quotes (customizable)) 20 * $(LI Double quotes in a field are escaped with a double quote) 21 * $(LI Each record should contain the same number of fields) 22 * ) 23 * 24 * Example: 25 * 26 * ------- 27 * import std.algorithm; 28 * import std.array; 29 * import std.csv; 30 * import std.stdio; 31 * import std.typecons; 32 * 33 * void main() 34 * { 35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; 36 * 37 * foreach (record; csvReader!(Tuple!(string, string, int))(text)) 38 * { 39 * writefln("%s works as a %s and earns $%d per year", 40 * record[0], record[1], record[2]); 41 * } 42 * 43 * // To read the same string from the file "filename.csv": 44 * 45 * auto file = File("filename.csv", "r"); 46 * foreach (record; 47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int))) 48 * { 49 * writefln("%s works as a %s and earns $%d per year", 50 * record[0], record[1], record[2]); 51 * } 52 } 53 * } 54 * ------- 55 * 56 * When an input contains a header the `Contents` can be specified as an 57 * associative array. Passing null to signify that a header is present. 58 * 59 * ------- 60 * auto text = "Name,Occupation,Salary\r" ~ 61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; 62 * 63 * foreach (record; csvReader!(string[string]) 64 * (text, null)) 65 * { 66 * writefln("%s works as a %s and earns $%s per year.", 67 * record["Name"], record["Occupation"], 68 * record["Salary"]); 69 * } 70 * 71 * // To read the same string from the file "filename.csv": 72 * 73 * auto file = File("filename.csv", "r"); 74 * 75 * foreach (record; csvReader!(string[string]) 76 * (file.byLine.joiner("\n"), null)) 77 * { 78 * writefln("%s works as a %s and earns $%s per year.", 79 * record["Name"], record["Occupation"], 80 * record["Salary"]); 81 * } 82 * ------- 83 * 84 * This module allows content to be iterated by record stored in a struct, 85 * class, associative array, or as a range of fields. Upon detection of an 86 * error an CSVException is thrown (can be disabled). csvNextToken has been 87 * made public to allow for attempted recovery. 88 * 89 * Disabling exceptions will lift many restrictions specified above. A quote 90 * can appear in a field if the field was not quoted. If in a quoted field any 91 * quote by itself, not at the end of a field, will end processing for that 92 * field. The field is ended when there is no input, even if the quote was not 93 * closed. 94 * 95 * See_Also: 96 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia 97 * Comma-separated values) 98 * 99 * Copyright: Copyright 2011 100 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 101 * Authors: Jesse Phillips 102 * Source: $(PHOBOSSRC std/csv.d) 103 */ 104 module std.csv; 105 106 import std.conv; 107 import std.exception : basicExceptionCtors; 108 import std.range.primitives; 109 import std.traits; 110 111 /** 112 * Exception containing the row and column for when an exception was thrown. 113 * 114 * Numbering of both row and col start at one and corresponds to the location 115 * in the file rather than any specified header. Special consideration should 116 * be made when there is failure to match the header see $(LREF 117 * HeaderMismatchException) for details. 118 * 119 * When performing type conversions, $(REF ConvException, std,conv) is stored in 120 * the `next` field. 121 */ 122 class CSVException : Exception 123 { 124 /// 125 size_t row, col; 126 127 // FIXME: Use std.exception.basicExceptionCtors here once 128 // https://issues.dlang.org/show_bug.cgi?id=11500 is fixed 129 130 this(string msg, string file = __FILE__, size_t line = __LINE__, 131 Throwable next = null) @nogc @safe pure nothrow 132 { 133 super(msg, file, line, next); 134 } 135 136 this(string msg, Throwable next, string file = __FILE__, 137 size_t line = __LINE__) @nogc @safe pure nothrow 138 { 139 super(msg, file, line, next); 140 } 141 142 this(string msg, size_t row, size_t col, Throwable next = null, 143 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow 144 { 145 super(msg, next, file, line); 146 this.row = row; 147 this.col = col; 148 } 149 150 override string toString() @safe pure const 151 { 152 return "(Row: " ~ to!string(row) ~ 153 ", Col: " ~ to!string(col) ~ ") " ~ msg; 154 } 155 } 156 157 /// 158 @safe unittest 159 { 160 import std.exception : collectException; 161 import std.algorithm.searching : count; 162 string text = "a,b,c\nHello,65"; 163 auto ex = collectException!CSVException(csvReader(text).count); 164 assert(ex.toString == "(Row: 0, Col: 0) Row 2's length 2 does not match previous length of 3."); 165 } 166 167 /// 168 @safe unittest 169 { 170 import std.exception : collectException; 171 import std.algorithm.searching : count; 172 import std.typecons : Tuple; 173 string text = "a,b\nHello,65"; 174 auto ex = collectException!CSVException(csvReader!(Tuple!(string,int))(text).count); 175 assert(ex.toString == "(Row: 1, Col: 2) Unexpected 'b' when converting from type string to type int"); 176 } 177 178 @safe pure unittest 179 { 180 import std.string; 181 auto e1 = new Exception("Foobar"); 182 auto e2 = new CSVException("args", e1); 183 assert(e2.next is e1); 184 185 size_t r = 13; 186 size_t c = 37; 187 188 auto e3 = new CSVException("argv", r, c); 189 assert(e3.row == r); 190 assert(e3.col == c); 191 192 auto em = e3.toString(); 193 assert(em.indexOf("13") != -1); 194 assert(em.indexOf("37") != -1); 195 } 196 197 /** 198 * Exception thrown when a Token is identified to not be completed: a quote is 199 * found in an unquoted field, data continues after a closing quote, or the 200 * quoted field was not closed before data was empty. 201 */ 202 class IncompleteCellException : CSVException 203 { 204 /** 205 * Data pulled from input before finding a problem 206 * 207 * This field is populated when using $(LREF csvReader) 208 * but not by $(LREF csvNextToken) as this data will have 209 * already been fed to the output range. 210 */ 211 dstring partialData; 212 213 mixin basicExceptionCtors; 214 } 215 216 /// 217 @safe unittest 218 { 219 import std.exception : assertThrown; 220 string text = "a,\"b,c\nHello,65,2.5"; 221 assertThrown!IncompleteCellException(text.csvReader(["a","b","c"])); 222 } 223 224 @safe pure unittest 225 { 226 auto e1 = new Exception("Foobar"); 227 auto e2 = new IncompleteCellException("args", e1); 228 assert(e2.next is e1); 229 } 230 231 /** 232 * Exception thrown under different conditions based on the type of $(D 233 * Contents). 234 * 235 * Structure, Class, and Associative Array 236 * $(UL 237 * $(LI When a header is provided but a matching column is not found) 238 * ) 239 * 240 * Other 241 * $(UL 242 * $(LI When a header is provided but a matching column is not found) 243 * $(LI Order did not match that found in the input) 244 * ) 245 * 246 * Since a row and column is not meaningful when a column specified by the 247 * header is not found in the data, both row and col will be zero. Otherwise 248 * row is always one and col is the first instance found in header that 249 * occurred before the previous starting at one. 250 */ 251 class HeaderMismatchException : CSVException 252 { 253 mixin basicExceptionCtors; 254 } 255 256 /// 257 @safe unittest 258 { 259 import std.exception : assertThrown; 260 string text = "a,b,c\nHello,65,2.5"; 261 assertThrown!HeaderMismatchException(text.csvReader(["b","c","invalid"])); 262 } 263 264 @safe pure unittest 265 { 266 auto e1 = new Exception("Foobar"); 267 auto e2 = new HeaderMismatchException("args", e1); 268 assert(e2.next is e1); 269 } 270 271 /** 272 * Determines the behavior for when an error is detected. 273 * 274 * Disabling exception will follow these rules: 275 * $(UL 276 * $(LI A quote can appear in a field if the field was not quoted.) 277 * $(LI If in a quoted field any quote by itself, not at the end of a 278 * field, will end processing for that field.) 279 * $(LI The field is ended when there is no input, even if the quote was 280 * not closed.) 281 * $(LI If the given header does not match the order in the input, the 282 * content will return as it is found in the input.) 283 * $(LI If the given header contains columns not found in the input they 284 * will be ignored.) 285 * ) 286 */ 287 enum Malformed 288 { 289 ignore, /// No exceptions are thrown due to incorrect CSV. 290 throwException /// Use exceptions when input has incorrect CSV. 291 } 292 293 /// 294 @safe unittest 295 { 296 import std.algorithm.comparison : equal; 297 import std.algorithm.searching : count; 298 import std.exception : assertThrown; 299 300 string text = "a,b,c\nHello,65,\"2.5"; 301 assertThrown!IncompleteCellException(text.csvReader.count); 302 303 // ignore the exceptions and try to handle invalid CSV 304 auto firstLine = text.csvReader!(string, Malformed.ignore)(null).front; 305 assert(firstLine.equal(["Hello", "65", "2.5"])); 306 } 307 308 /** 309 Returns an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) 310 for iterating over records found in `input`. 311 312 An optional `header` can be provided. The first record will be read in 313 as the header. If `Contents` is a struct then the header provided is 314 expected to correspond to the fields in the struct. When `Contents` is 315 not a type which can contain the entire record, the `header` must be 316 provided in the same order as the input or an exception is thrown. 317 318 Returns: 319 An input range R as defined by 320 $(REF isInputRange, std,range,primitives). When `Contents` is a 321 struct, class, or an associative array, the element type of R is 322 `Contents`, otherwise the element type of R is itself a range with 323 element type `Contents`. 324 325 If a `header` argument is provided, 326 the returned range provides a `header` field for accessing the header 327 from the input in array form. 328 329 Throws: 330 $(LREF CSVException) When a quote is found in an unquoted field, 331 data continues after a closing quote, the quoted field was not 332 closed before data was empty, a conversion failed, or when the row's 333 length does not match the previous length. 334 335 $(LREF HeaderMismatchException) when a header is provided but a 336 matching column is not found or the order did not match that found in 337 the input. Read the exception documentation for specific details of 338 when the exception is thrown for different types of `Contents`. 339 */ 340 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input, 341 Separator delimiter = ',', Separator quote = '"', 342 bool allowInconsistentDelimiterCount = false) 343 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 344 && isSomeChar!(Separator) 345 && !is(Contents T : T[U], U : string)) 346 { 347 return CsvReader!(Contents,ErrorLevel,Range, 348 Unqual!(ElementType!Range),string[]) 349 (input, delimiter, quote, allowInconsistentDelimiterCount); 350 } 351 352 /// ditto 353 auto csvReader(Contents = string, 354 Malformed ErrorLevel = Malformed.throwException, 355 Range, Header, Separator = char) 356 (Range input, Header header, 357 Separator delimiter = ',', Separator quote = '"', 358 bool allowInconsistentDelimiterCount = false) 359 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 360 && isSomeChar!(Separator) 361 && isForwardRange!Header 362 && isSomeString!(ElementType!Header)) 363 { 364 return CsvReader!(Contents,ErrorLevel,Range, 365 Unqual!(ElementType!Range),Header) 366 (input, header, delimiter, quote, allowInconsistentDelimiterCount); 367 } 368 369 /// ditto 370 auto csvReader(Contents = string, 371 Malformed ErrorLevel = Malformed.throwException, 372 Range, Header, Separator = char) 373 (Range input, Header header, 374 Separator delimiter = ',', Separator quote = '"', 375 bool allowInconsistentDelimiterCount = false) 376 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 377 && isSomeChar!(Separator) 378 && is(Header : typeof(null))) 379 { 380 return CsvReader!(Contents,ErrorLevel,Range, 381 Unqual!(ElementType!Range),string[]) 382 (input, cast(string[]) null, delimiter, quote, 383 allowInconsistentDelimiterCount); 384 } 385 386 387 /** 388 The `Contents` of the input can be provided if all the records are the 389 same type such as all integer data: 390 */ 391 @safe unittest 392 { 393 import std.algorithm.comparison : equal; 394 string text = "76,26,22"; 395 auto records = text.csvReader!int; 396 assert(records.equal!equal([ 397 [76, 26, 22], 398 ])); 399 } 400 401 /** 402 Using a struct with modified delimiter: 403 */ 404 @safe unittest 405 { 406 import std.algorithm.comparison : equal; 407 string text = "Hello;65;2.5\nWorld;123;7.5"; 408 struct Layout 409 { 410 string name; 411 int value; 412 double other; 413 } 414 415 auto records = text.csvReader!Layout(';'); 416 assert(records.equal([ 417 Layout("Hello", 65, 2.5), 418 Layout("World", 123, 7.5), 419 ])); 420 } 421 422 /** 423 Specifying `ErrorLevel` as $(LREF Malformed.ignore) will lift restrictions 424 on the format. This example shows that an exception is not thrown when 425 finding a quote in a field not quoted. 426 */ 427 @safe unittest 428 { 429 string text = "A \" is now part of the data"; 430 auto records = text.csvReader!(string, Malformed.ignore); 431 auto record = records.front; 432 433 assert(record.front == text); 434 } 435 436 /// Read only column "b" 437 @safe unittest 438 { 439 import std.algorithm.comparison : equal; 440 string text = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 441 auto records = text.csvReader!int(["b"]); 442 443 assert(records.equal!equal([ 444 [65], 445 [123], 446 ])); 447 } 448 449 /// Read while rearranging the columns by specifying a header with a different order" 450 @safe unittest 451 { 452 import std.algorithm.comparison : equal; 453 string text = "a,b,c\nHello,65,2.5\nWorld,123,7.5"; 454 struct Layout 455 { 456 int value; 457 double other; 458 string name; 459 } 460 461 auto records = text.csvReader!Layout(["b","c","a"]); 462 assert(records.equal([ 463 Layout(65, 2.5, "Hello"), 464 Layout(123, 7.5, "World") 465 ])); 466 } 467 468 /** 469 The header can also be left empty if the input contains a header row 470 and all columns should be iterated. 471 The header from the input can always be accessed from the `header` field. 472 */ 473 @safe unittest 474 { 475 string text = "a,b,c\nHello,65,63.63"; 476 auto records = text.csvReader(null); 477 478 assert(records.header == ["a","b","c"]); 479 } 480 481 /** 482 Handcrafted csv files tend to have an variable amount of columns. 483 484 By default `std.csv` will throw if the number of columns on a line 485 is unequal to the number of columns of the first line. 486 To allow, or disallow, a variable amount of columns a `bool` can be passed to 487 all overloads of the `csvReader` function as shown below. 488 */ 489 @safe unittest 490 { 491 import std.algorithm.comparison : equal; 492 493 string text = "76,26,22\n1,2\n3,4,5,6"; 494 auto records = text.csvReader!int(',', '"', true); 495 496 assert(records.equal!equal([ 497 [76, 26, 22], 498 [1, 2], 499 [3, 4, 5, 6] 500 ])); 501 } 502 503 /// ditto 504 @safe unittest 505 { 506 import std.algorithm.comparison : equal; 507 508 static struct Three 509 { 510 int a; 511 int b; 512 int c; 513 } 514 515 string text = "76,26,22\n1,2\n3,4,5,6"; 516 auto records = text.csvReader!Three(',', '"', true); 517 518 assert(records.equal([ 519 Three(76, 26, 22), 520 Three(1, 2, 0), 521 Three(3, 4, 5) 522 ])); 523 } 524 525 /// ditto 526 @safe unittest 527 { 528 import std.algorithm.comparison : equal; 529 530 auto text = "Name,Occupation,Salary\r" ~ 531 "Joe,Carpenter,300000\nFred,Blacksmith\r\n"; 532 533 auto r = csvReader!(string[string])(text, null, ',', '"', true); 534 535 assert(r.equal([ 536 [ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ], 537 [ "Name" : "Fred", "Occupation" : "Blacksmith" ] 538 ])); 539 } 540 541 // Test standard iteration over input. 542 @safe pure unittest 543 { 544 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six"; 545 auto records = csvReader(str); 546 547 int count; 548 foreach (record; records) 549 { 550 foreach (cell; record) 551 { 552 count++; 553 } 554 } 555 assert(count == 6); 556 } 557 558 // Test newline on last record 559 @safe pure unittest 560 { 561 string str = "one,two\nthree,four\n"; 562 auto records = csvReader(str); 563 records.popFront(); 564 records.popFront(); 565 assert(records.empty); 566 } 567 568 // Test shorter row length 569 @safe pure unittest 570 { 571 wstring str = "one,1\ntwo\nthree"w; 572 struct Layout 573 { 574 string name; 575 int value; 576 } 577 578 Layout[3] ans; 579 ans[0].name = "one"; 580 ans[0].value = 1; 581 ans[1].name = "two"; 582 ans[1].value = 0; 583 ans[2].name = "three"; 584 ans[2].value = 0; 585 586 auto records = csvReader!(Layout,Malformed.ignore)(str); 587 588 int count; 589 foreach (record; records) 590 { 591 assert(ans[count].name == record.name); 592 assert(ans[count].value == record.value); 593 count++; 594 } 595 } 596 597 // Test shorter row length exception 598 @safe pure unittest 599 { 600 import std.exception; 601 602 struct A 603 { 604 string a,b,c; 605 } 606 607 auto strs = ["one,1\ntwo", 608 "one\ntwo,2,二\nthree,3,三", 609 "one\ntwo,2\nthree,3", 610 "one,1\ntwo\nthree,3"]; 611 612 foreach (str; strs) 613 { 614 auto records = csvReader!A(str); 615 assertThrown!CSVException((){foreach (record; records) { }}()); 616 } 617 } 618 619 620 // Test structure conversion interface with unicode. 621 @safe pure unittest 622 { 623 import std.math.algebraic : abs; 624 625 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w; 626 struct Layout 627 { 628 string name; 629 int value; 630 double other; 631 } 632 633 Layout[2] ans; 634 ans[0].name = "\U00010143Hello"; 635 ans[0].value = 65; 636 ans[0].other = 63.63; 637 ans[1].name = "World"; 638 ans[1].value = 123; 639 ans[1].other = 3673.562; 640 641 auto records = csvReader!Layout(str); 642 643 int count; 644 foreach (record; records) 645 { 646 assert(ans[count].name == record.name); 647 assert(ans[count].value == record.value); 648 assert(abs(ans[count].other - record.other) < 0.00001); 649 count++; 650 } 651 assert(count == ans.length); 652 } 653 654 // Test input conversion interface 655 @safe pure unittest 656 { 657 import std.algorithm; 658 string str = `76,26,22`; 659 int[] ans = [76,26,22]; 660 auto records = csvReader!int(str); 661 662 foreach (record; records) 663 { 664 assert(equal(record, ans)); 665 } 666 } 667 668 // Test struct & header interface and same unicode 669 @safe unittest 670 { 671 import std.math.algebraic : abs; 672 673 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562"; 674 struct Layout 675 { 676 int value; 677 double other; 678 string name; 679 } 680 681 auto records = csvReader!Layout(str, ["b","c","a"]); 682 683 Layout[2] ans; 684 ans[0].name = "Hello"; 685 ans[0].value = 65; 686 ans[0].other = 63.63; 687 ans[1].name = "➊➋➂❹"; 688 ans[1].value = 123; 689 ans[1].other = 3673.562; 690 691 int count; 692 foreach (record; records) 693 { 694 assert(ans[count].name == record.name); 695 assert(ans[count].value == record.value); 696 assert(abs(ans[count].other - record.other) < 0.00001); 697 count++; 698 } 699 assert(count == ans.length); 700 701 } 702 703 // Test header interface 704 @safe unittest 705 { 706 import std.algorithm; 707 708 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 709 auto records = csvReader!int(str, ["b"]); 710 711 auto ans = [[65],[123]]; 712 foreach (record; records) 713 { 714 assert(equal(record, ans.front)); 715 ans.popFront(); 716 } 717 718 try 719 { 720 csvReader(str, ["c","b"]); 721 assert(0); 722 } 723 catch (HeaderMismatchException e) 724 { 725 assert(e.col == 2); 726 } 727 auto records2 = csvReader!(string,Malformed.ignore) 728 (str, ["b","a"], ',', '"'); 729 730 auto ans2 = [["Hello","65"],["World","123"]]; 731 foreach (record; records2) 732 { 733 assert(equal(record, ans2.front)); 734 ans2.popFront(); 735 } 736 737 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4"; 738 records2 = csvReader!(string,Malformed.ignore) 739 (str, ["a","b","c","d"], ',', '"'); 740 741 ans2 = [["Joe","Carpenter"],["Fred","Fly"]]; 742 foreach (record; records2) 743 { 744 assert(equal(record, ans2.front)); 745 ans2.popFront(); 746 } 747 } 748 749 // Test null header interface 750 @safe unittest 751 { 752 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 753 auto records = csvReader(str, ["a"]); 754 755 assert(records.header == ["a","b","c"]); 756 } 757 758 // Test unchecked read 759 @safe pure unittest 760 { 761 string str = "one \"quoted\""; 762 foreach (record; csvReader!(string,Malformed.ignore)(str)) 763 { 764 foreach (cell; record) 765 { 766 assert(cell == "one \"quoted\""); 767 } 768 } 769 770 str = "one \"quoted\",two \"quoted\" end"; 771 struct Ans 772 { 773 string a,b; 774 } 775 foreach (record; csvReader!(Ans,Malformed.ignore)(str)) 776 { 777 assert(record.a == "one \"quoted\""); 778 assert(record.b == "two \"quoted\" end"); 779 } 780 } 781 782 // Test partial data returned 783 @safe pure unittest 784 { 785 string str = "\"one\nnew line"; 786 787 try 788 { 789 foreach (record; csvReader(str)) 790 {} 791 assert(0); 792 } 793 catch (IncompleteCellException ice) 794 { 795 assert(ice.partialData == "one\nnew line"); 796 } 797 } 798 799 // Test Windows line break 800 @safe pure unittest 801 { 802 string str = "one,two\r\nthree"; 803 804 auto records = csvReader(str); 805 auto record = records.front; 806 assert(record.front == "one"); 807 record.popFront(); 808 assert(record.front == "two"); 809 records.popFront(); 810 record = records.front; 811 assert(record.front == "three"); 812 } 813 814 815 // Test associative array support with unicode separator 816 @safe unittest 817 { 818 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63"; 819 820 auto records = csvReader!(string[string])(str,["3","1"],'❁'); 821 int count; 822 foreach (record; records) 823 { 824 count++; 825 assert(record["1"] == "34"); 826 assert(record["3"] == "63"); 827 } 828 assert(count == 2); 829 } 830 831 // Test restricted range 832 @safe unittest 833 { 834 import std.typecons; 835 struct InputRange 836 { 837 dstring text; 838 839 this(dstring txt) 840 { 841 text = txt; 842 } 843 844 @property auto empty() 845 { 846 return text.empty; 847 } 848 849 void popFront() 850 { 851 text.popFront(); 852 } 853 854 @property dchar front() 855 { 856 return text[0]; 857 } 858 } 859 auto ir = InputRange("Name,Occupation,Salary\r"d~ 860 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d); 861 862 foreach (record; csvReader(ir, cast(string[]) null)) 863 foreach (cell; record) {} 864 foreach (record; csvReader!(Tuple!(string, string, int)) 865 (ir,cast(string[]) null)) {} 866 foreach (record; csvReader!(string[string]) 867 (ir,cast(string[]) null)) {} 868 } 869 870 @safe unittest // const/immutable dchars 871 { 872 import std.algorithm.iteration : map; 873 import std.array : array; 874 const(dchar)[] c = "foo,bar\n"; 875 assert(csvReader(c).map!array.array == [["foo", "bar"]]); 876 immutable(dchar)[] i = "foo,bar\n"; 877 assert(csvReader(i).map!array.array == [["foo", "bar"]]); 878 } 879 880 /* 881 * This struct is stored on the heap for when the structures 882 * are passed around. 883 */ 884 private pure struct Input(Range, Malformed ErrorLevel) 885 { 886 Range range; 887 size_t row, col; 888 static if (ErrorLevel == Malformed.throwException) 889 size_t rowLength; 890 } 891 892 /* 893 * Range for iterating CSV records. 894 * 895 * This range is returned by the $(LREF csvReader) functions. It can be 896 * created in a similar manner to allow `ErrorLevel` be set to $(LREF 897 * Malformed).ignore if best guess processing should take place. 898 */ 899 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header) 900 if (isSomeChar!Separator && isInputRange!Range 901 && is(immutable ElementType!Range == immutable dchar) 902 && isForwardRange!Header && isSomeString!(ElementType!Header)) 903 { 904 private: 905 Input!(Range, ErrorLevel)* _input; 906 Separator _separator; 907 Separator _quote; 908 size_t[] indices; 909 bool _empty; 910 bool _allowInconsistentDelimiterCount; 911 static if (is(Contents == struct) || is(Contents == class)) 912 { 913 Contents recordContent; 914 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange; 915 } 916 else static if (is(Contents T : T[U], U : string)) 917 { 918 Contents recordContent; 919 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange; 920 } 921 else 922 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange; 923 public: 924 /** 925 * Header from the input in array form. 926 * 927 * ------- 928 * string str = "a,b,c\nHello,65,63.63"; 929 * auto records = csvReader(str, ["a"]); 930 * 931 * assert(records.header == ["a","b","c"]); 932 * ------- 933 */ 934 string[] header; 935 936 /** 937 * Constructor to initialize the input, delimiter and quote for input 938 * without a header. 939 * 940 * ------- 941 * string str = `76;^26^;22`; 942 * int[] ans = [76,26,22]; 943 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 944 * (str, ';', '^'); 945 * 946 * foreach (record; records) 947 * { 948 * assert(equal(record, ans)); 949 * } 950 * ------- 951 */ 952 this(Range input, Separator delimiter, Separator quote, 953 bool allowInconsistentDelimiterCount) 954 { 955 _input = new Input!(Range, ErrorLevel)(input); 956 _separator = delimiter; 957 _quote = quote; 958 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 959 960 if (_input.range.empty) 961 { 962 _empty = true; 963 return; 964 } 965 966 prime(); 967 } 968 969 /** 970 * Constructor to initialize the input, delimiter and quote for input 971 * with a header. 972 * 973 * ------- 974 * string str = `high;mean;low\n76;^26^;22`; 975 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 976 * (str, ["high","low"], ';', '^'); 977 * 978 * int[] ans = [76,22]; 979 * foreach (record; records) 980 * { 981 * assert(equal(record, ans)); 982 * } 983 * ------- 984 * 985 * Throws: 986 * $(LREF HeaderMismatchException) when a header is provided but a 987 * matching column is not found or the order did not match that found 988 * in the input (non-struct). 989 */ 990 this(Range input, Header colHeaders, Separator delimiter, Separator quote, 991 bool allowInconsistentDelimiterCount) 992 { 993 _input = new Input!(Range, ErrorLevel)(input); 994 _separator = delimiter; 995 _quote = quote; 996 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 997 998 if (_input.range.empty) 999 { 1000 _empty = true; 1001 return; 1002 } 1003 1004 size_t[string] colToIndex; 1005 foreach (h; colHeaders) 1006 { 1007 colToIndex[h] = size_t.max; 1008 } 1009 1010 auto r = CsvRecord!(string, ErrorLevel, Range, Separator) 1011 (_input, _separator, _quote, indices, 1012 _allowInconsistentDelimiterCount); 1013 1014 size_t colIndex; 1015 foreach (col; r) 1016 { 1017 header ~= col; 1018 auto ptr = col in colToIndex; 1019 if (ptr) 1020 *ptr = colIndex; 1021 colIndex++; 1022 } 1023 // The above loop empties the header row. 1024 recordRange._empty = true; 1025 recordRange._allowInconsistentDelimiterCount = 1026 allowInconsistentDelimiterCount; 1027 1028 indices.length = colToIndex.length; 1029 int i; 1030 foreach (h; colHeaders) 1031 { 1032 immutable index = colToIndex[h]; 1033 static if (ErrorLevel != Malformed.ignore) 1034 if (index == size_t.max) 1035 throw new HeaderMismatchException 1036 ("Header not found: " ~ to!string(h)); 1037 indices[i++] = index; 1038 } 1039 1040 static if (!is(Contents == struct) && !is(Contents == class)) 1041 { 1042 static if (is(Contents T : T[U], U : string)) 1043 { 1044 import std.algorithm.sorting : sort; 1045 sort(indices); 1046 } 1047 else static if (ErrorLevel == Malformed.ignore) 1048 { 1049 import std.algorithm.sorting : sort; 1050 sort(indices); 1051 } 1052 else 1053 { 1054 import std.algorithm.searching : findAdjacent; 1055 import std.algorithm.sorting : isSorted; 1056 if (!isSorted(indices)) 1057 { 1058 auto ex = new HeaderMismatchException 1059 ("Header in input does not match specified header."); 1060 findAdjacent!"a > b"(indices); 1061 ex.row = 1; 1062 ex.col = indices.front; 1063 1064 throw ex; 1065 } 1066 } 1067 } 1068 1069 popFront(); 1070 } 1071 1072 /** 1073 * Part of an input range as defined by 1074 * $(REF isInputRange, std,range,primitives). 1075 * 1076 * Returns: 1077 * If `Contents` is a struct, will be filled with record data. 1078 * 1079 * If `Contents` is a class, will be filled with record data. 1080 * 1081 * If `Contents` is a associative array, will be filled 1082 * with record data. 1083 * 1084 * If `Contents` is non-struct, a $(LREF CsvRecord) will be 1085 * returned. 1086 */ 1087 @property auto front() 1088 { 1089 assert(!empty, "Attempting to fetch the front of an empty CsvReader"); 1090 static if (is(Contents == struct) || is(Contents == class)) 1091 { 1092 return recordContent; 1093 } 1094 else static if (is(Contents T : T[U], U : string)) 1095 { 1096 return recordContent; 1097 } 1098 else 1099 { 1100 return recordRange; 1101 } 1102 } 1103 1104 /** 1105 * Part of an input range as defined by 1106 * $(REF isInputRange, std,range,primitives). 1107 */ 1108 @property bool empty() @safe @nogc pure nothrow const 1109 { 1110 return _empty; 1111 } 1112 1113 /** 1114 * Part of an input range as defined by 1115 * $(REF isInputRange, std,range,primitives). 1116 * 1117 * Throws: 1118 * $(LREF CSVException) When a quote is found in an unquoted field, 1119 * data continues after a closing quote, the quoted field was not 1120 * closed before data was empty, a conversion failed, or when the 1121 * row's length does not match the previous length. 1122 */ 1123 void popFront() 1124 { 1125 while (!recordRange.empty) 1126 { 1127 recordRange.popFront(); 1128 } 1129 1130 static if (ErrorLevel == Malformed.throwException) 1131 if (_input.rowLength == 0) 1132 _input.rowLength = _input.col; 1133 1134 _input.col = 0; 1135 1136 if (!_input.range.empty) 1137 { 1138 if (_input.range.front == '\r') 1139 { 1140 _input.range.popFront(); 1141 if (!_input.range.empty && _input.range.front == '\n') 1142 _input.range.popFront(); 1143 } 1144 else if (_input.range.front == '\n') 1145 _input.range.popFront(); 1146 } 1147 1148 if (_input.range.empty) 1149 { 1150 _empty = true; 1151 return; 1152 } 1153 1154 prime(); 1155 } 1156 1157 private void prime() 1158 { 1159 if (_empty) 1160 return; 1161 _input.row++; 1162 static if (is(Contents == struct) || is(Contents == class)) 1163 { 1164 recordRange = typeof(recordRange) 1165 (_input, _separator, _quote, null, 1166 _allowInconsistentDelimiterCount); 1167 } 1168 else 1169 { 1170 recordRange = typeof(recordRange) 1171 (_input, _separator, _quote, indices, 1172 _allowInconsistentDelimiterCount); 1173 } 1174 1175 static if (is(Contents T : T[U], U : string)) 1176 { 1177 T[U] aa; 1178 try 1179 { 1180 for (; !recordRange.empty; recordRange.popFront()) 1181 { 1182 aa[header[_input.col-1]] = recordRange.front; 1183 } 1184 } 1185 catch (ConvException e) 1186 { 1187 throw new CSVException(e.msg, _input.row, _input.col, e); 1188 } 1189 1190 recordContent = aa; 1191 } 1192 else static if (is(Contents == struct) || is(Contents == class)) 1193 { 1194 static if (is(Contents == class)) 1195 recordContent = new typeof(recordContent)(); 1196 else 1197 recordContent = typeof(recordContent).init; 1198 size_t colIndex; 1199 try 1200 { 1201 for (; !recordRange.empty;) 1202 { 1203 auto colData = recordRange.front; 1204 scope(exit) colIndex++; 1205 if (indices.length > 0) 1206 { 1207 foreach (ti, ToType; Fields!(Contents)) 1208 { 1209 if (indices[ti] == colIndex) 1210 { 1211 static if (!isSomeString!ToType) skipWS(colData); 1212 recordContent.tupleof[ti] = to!ToType(colData); 1213 } 1214 } 1215 } 1216 else 1217 { 1218 foreach (ti, ToType; Fields!(Contents)) 1219 { 1220 if (ti == colIndex) 1221 { 1222 static if (!isSomeString!ToType) skipWS(colData); 1223 recordContent.tupleof[ti] = to!ToType(colData); 1224 } 1225 } 1226 } 1227 recordRange.popFront(); 1228 } 1229 } 1230 catch (ConvException e) 1231 { 1232 throw new CSVException(e.msg, _input.row, colIndex, e); 1233 } 1234 } 1235 } 1236 } 1237 1238 @safe pure unittest 1239 { 1240 import std.algorithm.comparison : equal; 1241 1242 string str = `76;^26^;22`; 1243 int[] ans = [76,26,22]; 1244 auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 1245 (str, ';', '^', false); 1246 1247 foreach (record; records) 1248 { 1249 assert(equal(record, ans)); 1250 } 1251 } 1252 1253 // https://issues.dlang.org/show_bug.cgi?id=15545 1254 // @system due to the catch for Throwable 1255 @system pure unittest 1256 { 1257 import std.exception : assertNotThrown; 1258 enum failData = 1259 "name, surname, age 1260 Joe, Joker, 99\r"; 1261 auto r = csvReader(failData); 1262 assertNotThrown((){foreach (entry; r){}}()); 1263 } 1264 1265 /* 1266 * This input range is accessible through $(LREF CsvReader) when the 1267 * requested `Contents` type is neither a structure or an associative array. 1268 */ 1269 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator) 1270 if (!is(Contents == class) && !is(Contents == struct)) 1271 { 1272 import std.array : appender; 1273 private: 1274 Input!(Range, ErrorLevel)* _input; 1275 Separator _separator; 1276 Separator _quote; 1277 Contents curContentsoken; 1278 typeof(appender!(dchar[])()) _front; 1279 bool _empty; 1280 bool _allowInconsistentDelimiterCount; 1281 size_t[] _popCount; 1282 public: 1283 /* 1284 * Params: 1285 * input = Pointer to a character $(REF_ALTTEXT input range, isInputRange, std,range,primitives) 1286 * delimiter = Separator for each column 1287 * quote = Character used for quotation 1288 * indices = An array containing which columns will be returned. 1289 * If empty, all columns are returned. List must be in order. 1290 */ 1291 this(Input!(Range, ErrorLevel)* input, Separator delimiter, 1292 Separator quote, size_t[] indices, 1293 bool allowInconsistentDelimiterCount) 1294 { 1295 _input = input; 1296 _separator = delimiter; 1297 _quote = quote; 1298 1299 _front = appender!(dchar[])(); 1300 _popCount = indices.dup; 1301 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 1302 1303 // If a header was given, each call to popFront will need 1304 // to eliminate so many tokens. This calculates 1305 // how many will be skipped to get to the next header column 1306 size_t normalizer; 1307 foreach (ref c; _popCount) 1308 { 1309 static if (ErrorLevel == Malformed.ignore) 1310 { 1311 // If we are not throwing exceptions 1312 // a header may not exist, indices are sorted 1313 // and will be size_t.max if not found. 1314 if (c == size_t.max) 1315 break; 1316 } 1317 c -= normalizer; 1318 normalizer += c + 1; 1319 } 1320 1321 prime(); 1322 } 1323 1324 /** 1325 * Part of an input range as defined by 1326 * $(REF isInputRange, std,range,primitives). 1327 */ 1328 @property Contents front() @safe pure 1329 { 1330 assert(!empty, "Attempting to fetch the front of an empty CsvRecord"); 1331 return curContentsoken; 1332 } 1333 1334 /** 1335 * Part of an input range as defined by 1336 * $(REF isInputRange, std,range,primitives). 1337 */ 1338 @property bool empty() @safe pure nothrow @nogc const 1339 { 1340 return _empty; 1341 } 1342 1343 /* 1344 * CsvRecord is complete when input 1345 * is empty or starts with record break 1346 */ 1347 private bool recordEnd() 1348 { 1349 if (_input.range.empty 1350 || _input.range.front == '\n' 1351 || _input.range.front == '\r') 1352 { 1353 return true; 1354 } 1355 return false; 1356 } 1357 1358 1359 /** 1360 * Part of an input range as defined by 1361 * $(REF isInputRange, std,range,primitives). 1362 * 1363 * Throws: 1364 * $(LREF CSVException) When a quote is found in an unquoted field, 1365 * data continues after a closing quote, the quoted field was not 1366 * closed before data was empty, a conversion failed, or when the 1367 * row's length does not match the previous length. 1368 */ 1369 void popFront() 1370 { 1371 static if (ErrorLevel == Malformed.throwException) 1372 import std.format : format; 1373 // Skip last of record when header is depleted. 1374 if (_popCount.ptr && _popCount.empty) 1375 while (!recordEnd()) 1376 { 1377 prime(1); 1378 } 1379 1380 if (recordEnd()) 1381 { 1382 _empty = true; 1383 static if (ErrorLevel == Malformed.throwException) 1384 { 1385 if (_input.rowLength != 0 && _input.col != _input.rowLength 1386 && !_allowInconsistentDelimiterCount) 1387 { 1388 throw new CSVException( 1389 format("Row %s's length %s does not match "~ 1390 "previous length of %s.", _input.row, 1391 _input.col, _input.rowLength)); 1392 } 1393 } 1394 return; 1395 } 1396 else 1397 { 1398 static if (ErrorLevel == Malformed.throwException) 1399 { 1400 if (_input.rowLength != 0 && _input.col > _input.rowLength) 1401 { 1402 if (!_allowInconsistentDelimiterCount) 1403 { 1404 throw new CSVException( 1405 format("Row %s's length %s does not match "~ 1406 "previous length of %s.", _input.row, 1407 _input.col, _input.rowLength)); 1408 } 1409 else 1410 { 1411 _empty = true; 1412 return; 1413 } 1414 } 1415 } 1416 } 1417 1418 // Separator is left on the end of input from the last call. 1419 // This cannot be moved to after the call to csvNextToken as 1420 // there may be an empty record after it. 1421 if (_input.range.front == _separator) 1422 _input.range.popFront(); 1423 1424 _front.shrinkTo(0); 1425 1426 prime(); 1427 } 1428 1429 /* 1430 * Handles moving to the next skipNum token. 1431 */ 1432 private void prime(size_t skipNum) 1433 { 1434 foreach (i; 0 .. skipNum) 1435 { 1436 _input.col++; 1437 _front.shrinkTo(0); 1438 if (_input.range.front == _separator) 1439 _input.range.popFront(); 1440 1441 try 1442 csvNextToken!(Range, ErrorLevel, Separator) 1443 (_input.range, _front, _separator, _quote,false); 1444 catch (IncompleteCellException ice) 1445 { 1446 ice.row = _input.row; 1447 ice.col = _input.col; 1448 ice.partialData = _front.data.idup; 1449 throw ice; 1450 } 1451 catch (ConvException e) 1452 { 1453 throw new CSVException(e.msg, _input.row, _input.col, e); 1454 } 1455 } 1456 } 1457 1458 private void prime() 1459 { 1460 try 1461 { 1462 _input.col++; 1463 csvNextToken!(Range, ErrorLevel, Separator) 1464 (_input.range, _front, _separator, _quote,false); 1465 } 1466 catch (IncompleteCellException ice) 1467 { 1468 ice.row = _input.row; 1469 ice.col = _input.col; 1470 ice.partialData = _front.data.idup; 1471 throw ice; 1472 } 1473 1474 auto skipNum = _popCount.empty ? 0 : _popCount.front; 1475 if (!_popCount.empty) 1476 _popCount.popFront(); 1477 1478 if (skipNum == size_t.max) 1479 { 1480 while (!recordEnd()) 1481 prime(1); 1482 _empty = true; 1483 return; 1484 } 1485 1486 if (skipNum) 1487 prime(skipNum); 1488 1489 auto data = _front.data; 1490 static if (!isSomeString!Contents) skipWS(data); 1491 try curContentsoken = to!Contents(data); 1492 catch (ConvException e) 1493 { 1494 throw new CSVException(e.msg, _input.row, _input.col, e); 1495 } 1496 } 1497 } 1498 1499 /** 1500 * Lower level control over parsing CSV 1501 * 1502 * This function consumes the input. After each call the input will 1503 * start with either a delimiter or record break (\n, \r\n, \r) which 1504 * must be removed for subsequent calls. 1505 * 1506 * Params: 1507 * input = Any CSV input 1508 * ans = The first field in the input 1509 * sep = The character to represent a comma in the specification 1510 * quote = The character to represent a quote in the specification 1511 * startQuoted = Whether the input should be considered to already be in 1512 * quotes 1513 * 1514 * Throws: 1515 * $(LREF IncompleteCellException) When a quote is found in an unquoted 1516 * field, data continues after a closing quote, or the quoted field was 1517 * not closed before data was empty. 1518 */ 1519 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException, 1520 Separator, Output) 1521 (ref Range input, ref Output ans, 1522 Separator sep, Separator quote, 1523 bool startQuoted = false) 1524 if (isSomeChar!Separator && isInputRange!Range 1525 && is(immutable ElementType!Range == immutable dchar) 1526 && isOutputRange!(Output, dchar)) 1527 { 1528 bool quoted = startQuoted; 1529 bool escQuote; 1530 if (input.empty) 1531 return; 1532 1533 if (input.front == '\n') 1534 return; 1535 if (input.front == '\r') 1536 return; 1537 1538 if (input.front == quote) 1539 { 1540 quoted = true; 1541 input.popFront(); 1542 } 1543 1544 while (!input.empty) 1545 { 1546 assert(!(quoted && escQuote), 1547 "Invalid quotation state in csvNextToken"); 1548 if (!quoted) 1549 { 1550 // When not quoted the token ends at sep 1551 if (input.front == sep) 1552 break; 1553 if (input.front == '\r') 1554 break; 1555 if (input.front == '\n') 1556 break; 1557 } 1558 if (!quoted && !escQuote) 1559 { 1560 if (input.front == quote) 1561 { 1562 // Not quoted, but quote found 1563 static if (ErrorLevel == Malformed.throwException) 1564 throw new IncompleteCellException( 1565 "Quote located in unquoted token"); 1566 else static if (ErrorLevel == Malformed.ignore) 1567 ans.put(quote); 1568 } 1569 else 1570 { 1571 // Not quoted, non-quote character 1572 ans.put(input.front); 1573 } 1574 } 1575 else 1576 { 1577 if (input.front == quote) 1578 { 1579 // Quoted, quote found 1580 // By turning off quoted and turning on escQuote 1581 // I can tell when to add a quote to the string 1582 // escQuote is turned to false when it escapes a 1583 // quote or is followed by a non-quote (see outside else). 1584 // They are mutually exclusive, but provide different 1585 // information. 1586 if (escQuote) 1587 { 1588 escQuote = false; 1589 quoted = true; 1590 ans.put(quote); 1591 } else 1592 { 1593 escQuote = true; 1594 quoted = false; 1595 } 1596 } 1597 else 1598 { 1599 // Quoted, non-quote character 1600 if (escQuote) 1601 { 1602 static if (ErrorLevel == Malformed.throwException) 1603 throw new IncompleteCellException( 1604 "Content continues after end quote, " ~ 1605 "or needs to be escaped."); 1606 else static if (ErrorLevel == Malformed.ignore) 1607 break; 1608 } 1609 ans.put(input.front); 1610 } 1611 } 1612 input.popFront(); 1613 } 1614 1615 static if (ErrorLevel == Malformed.throwException) 1616 if (quoted && (input.empty || input.front == '\n' || input.front == '\r')) 1617 throw new IncompleteCellException( 1618 "Data continues on future lines or trailing quote"); 1619 1620 } 1621 1622 /// 1623 @safe unittest 1624 { 1625 import std.array : appender; 1626 import std.range.primitives : popFront; 1627 1628 string str = "65,63\n123,3673"; 1629 1630 auto a = appender!(char[])(); 1631 1632 csvNextToken(str,a,',','"'); 1633 assert(a.data == "65"); 1634 assert(str == ",63\n123,3673"); 1635 1636 str.popFront(); 1637 a.shrinkTo(0); 1638 csvNextToken(str,a,',','"'); 1639 assert(a.data == "63"); 1640 assert(str == "\n123,3673"); 1641 1642 str.popFront(); 1643 a.shrinkTo(0); 1644 csvNextToken(str,a,',','"'); 1645 assert(a.data == "123"); 1646 assert(str == ",3673"); 1647 } 1648 1649 // Test csvNextToken on simplest form and correct format. 1650 @safe pure unittest 1651 { 1652 import std.array; 1653 1654 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"; 1655 1656 auto a = appender!(dchar[])(); 1657 csvNextToken!string(str,a,',','"'); 1658 assert(a.data == "\U00010143Hello"); 1659 assert(str == ",65,63.63\nWorld,123,3673.562"); 1660 1661 str.popFront(); 1662 a.shrinkTo(0); 1663 csvNextToken(str,a,',','"'); 1664 assert(a.data == "65"); 1665 assert(str == ",63.63\nWorld,123,3673.562"); 1666 1667 str.popFront(); 1668 a.shrinkTo(0); 1669 csvNextToken(str,a,',','"'); 1670 assert(a.data == "63.63"); 1671 assert(str == "\nWorld,123,3673.562"); 1672 1673 str.popFront(); 1674 a.shrinkTo(0); 1675 csvNextToken(str,a,',','"'); 1676 assert(a.data == "World"); 1677 assert(str == ",123,3673.562"); 1678 1679 str.popFront(); 1680 a.shrinkTo(0); 1681 csvNextToken(str,a,',','"'); 1682 assert(a.data == "123"); 1683 assert(str == ",3673.562"); 1684 1685 str.popFront(); 1686 a.shrinkTo(0); 1687 csvNextToken(str,a,',','"'); 1688 assert(a.data == "3673.562"); 1689 assert(str == ""); 1690 } 1691 1692 // Test quoted tokens 1693 @safe pure unittest 1694 { 1695 import std.array; 1696 1697 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; 1698 1699 auto a = appender!(dchar[])(); 1700 csvNextToken!string(str,a,',','"'); 1701 assert(a.data == "one"); 1702 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); 1703 1704 str.popFront(); 1705 a.shrinkTo(0); 1706 csvNextToken(str,a,',','"'); 1707 assert(a.data == "two"); 1708 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); 1709 1710 str.popFront(); 1711 a.shrinkTo(0); 1712 csvNextToken(str,a,',','"'); 1713 assert(a.data == "three \"quoted\""); 1714 assert(str == `,"",` ~ "\"five\nnew line\"\nsix"); 1715 1716 str.popFront(); 1717 a.shrinkTo(0); 1718 csvNextToken(str,a,',','"'); 1719 assert(a.data == ""); 1720 assert(str == ",\"five\nnew line\"\nsix"); 1721 1722 str.popFront(); 1723 a.shrinkTo(0); 1724 csvNextToken(str,a,',','"'); 1725 assert(a.data == "five\nnew line"); 1726 assert(str == "\nsix"); 1727 1728 str.popFront(); 1729 a.shrinkTo(0); 1730 csvNextToken(str,a,',','"'); 1731 assert(a.data == "six"); 1732 assert(str == ""); 1733 } 1734 1735 // Test empty data is pulled at end of record. 1736 @safe pure unittest 1737 { 1738 import std.array; 1739 1740 string str = "one,"; 1741 auto a = appender!(dchar[])(); 1742 csvNextToken(str,a,',','"'); 1743 assert(a.data == "one"); 1744 assert(str == ","); 1745 1746 a.shrinkTo(0); 1747 csvNextToken(str,a,',','"'); 1748 assert(a.data == ""); 1749 } 1750 1751 // Test exceptions 1752 @safe pure unittest 1753 { 1754 import std.array; 1755 1756 string str = "\"one\nnew line"; 1757 1758 typeof(appender!(dchar[])()) a; 1759 try 1760 { 1761 a = appender!(dchar[])(); 1762 csvNextToken(str,a,',','"'); 1763 assert(0); 1764 } 1765 catch (IncompleteCellException ice) 1766 { 1767 assert(a.data == "one\nnew line"); 1768 assert(str == ""); 1769 } 1770 1771 str = "Hello world\""; 1772 1773 try 1774 { 1775 a = appender!(dchar[])(); 1776 csvNextToken(str,a,',','"'); 1777 assert(0); 1778 } 1779 catch (IncompleteCellException ice) 1780 { 1781 assert(a.data == "Hello world"); 1782 assert(str == "\""); 1783 } 1784 1785 str = "one, two \"quoted\" end"; 1786 1787 a = appender!(dchar[])(); 1788 csvNextToken!(string,Malformed.ignore)(str,a,',','"'); 1789 assert(a.data == "one"); 1790 str.popFront(); 1791 a.shrinkTo(0); 1792 csvNextToken!(string,Malformed.ignore)(str,a,',','"'); 1793 assert(a.data == " two \"quoted\" end"); 1794 } 1795 1796 // Test modifying token delimiter 1797 @safe pure unittest 1798 { 1799 import std.array; 1800 1801 string str = `one|two|/three "quoted"/|//`; 1802 1803 auto a = appender!(dchar[])(); 1804 csvNextToken(str,a, '|','/'); 1805 assert(a.data == "one"d); 1806 assert(str == `|two|/three "quoted"/|//`); 1807 1808 str.popFront(); 1809 a.shrinkTo(0); 1810 csvNextToken(str,a, '|','/'); 1811 assert(a.data == "two"d); 1812 assert(str == `|/three "quoted"/|//`); 1813 1814 str.popFront(); 1815 a.shrinkTo(0); 1816 csvNextToken(str,a, '|','/'); 1817 assert(a.data == `three "quoted"`); 1818 assert(str == `|//`); 1819 1820 str.popFront(); 1821 a.shrinkTo(0); 1822 csvNextToken(str,a, '|','/'); 1823 assert(a.data == ""d); 1824 } 1825 1826 // https://issues.dlang.org/show_bug.cgi?id=8908 1827 @safe pure unittest 1828 { 1829 string csv = ` 1.0, 2.0, 3.0 1830 4.0, 5.0, 6.0`; 1831 1832 static struct Data { real a, b, c; } 1833 size_t i = 0; 1834 foreach (data; csvReader!Data(csv)) with (data) 1835 { 1836 int[] row = [cast(int) a, cast(int) b, cast(int) c]; 1837 if (i == 0) 1838 assert(row == [1, 2, 3]); 1839 else 1840 assert(row == [4, 5, 6]); 1841 ++i; 1842 } 1843 1844 i = 0; 1845 foreach (data; csvReader!real(csv)) 1846 { 1847 auto a = data.front; data.popFront(); 1848 auto b = data.front; data.popFront(); 1849 auto c = data.front; 1850 int[] row = [cast(int) a, cast(int) b, cast(int) c]; 1851 if (i == 0) 1852 assert(row == [1, 2, 3]); 1853 else 1854 assert(row == [4, 5, 6]); 1855 ++i; 1856 } 1857 } 1858 1859 // https://issues.dlang.org/show_bug.cgi?id=21629 1860 @safe pure unittest 1861 { 1862 import std.typecons : Tuple; 1863 struct Reccord 1864 { 1865 string a; 1866 string b; 1867 } 1868 1869 auto header = ["a" ,"b"]; 1870 string input = ""; 1871 assert(csvReader!Reccord(input).empty, "This should be empty"); 1872 assert(csvReader!Reccord(input, header).empty, "This should be empty"); 1873 assert(csvReader!(Tuple!(string,string))(input).empty, "This should be empty"); 1874 assert(csvReader!(string[string])(input, header).empty, "This should be empty"); 1875 assert(csvReader!(string[string])(input, null).empty, "This should be empty"); 1876 assert(csvReader!(int)(input, null).empty, "This should be empty"); 1877 }