1 //Written in the D programming language 2 3 /** 4 * Implements functionality to read Comma Separated Values and its variants 5 * from an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of `dchar`. 6 * 7 * Comma Separated Values provide a simple means to transfer and store 8 * tabular data. It has been common for programs to use their own 9 * variant of the CSV format. This parser will loosely follow the 10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere 11 * to the following criteria (differences from RFC-4180 in parentheses): 12 * 13 * $(UL 14 * $(LI A record is separated by a new line (CRLF,LF,CR)) 15 * $(LI A final record may end with a new line) 16 * $(LI A header may be provided as the first record in input) 17 * $(LI A record has fields separated by a comma (customizable)) 18 * $(LI A field containing new lines, commas, or double quotes 19 * should be enclosed in double quotes (customizable)) 20 * $(LI Double quotes in a field are escaped with a double quote) 21 * $(LI Each record should contain the same number of fields) 22 * ) 23 * 24 * Example: 25 * 26 * ------- 27 * import std.algorithm; 28 * import std.array; 29 * import std.csv; 30 * import std.stdio; 31 * import std.typecons; 32 * 33 * void main() 34 * { 35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; 36 * 37 * foreach (record; csvReader!(Tuple!(string, string, int))(text)) 38 * { 39 * writefln("%s works as a %s and earns $%d per year", 40 * record[0], record[1], record[2]); 41 * } 42 * 43 * // To read the same string from the file "filename.csv": 44 * 45 * auto file = File("filename.csv", "r"); 46 * foreach (record; 47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int))) 48 * { 49 * writefln("%s works as a %s and earns $%d per year", 50 * record[0], record[1], record[2]); 51 * } 52 } 53 * } 54 * ------- 55 * 56 * When an input contains a header the `Contents` can be specified as an 57 * associative array. Passing null to signify that a header is present. 58 * 59 * ------- 60 * auto text = "Name,Occupation,Salary\r" ~ 61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; 62 * 63 * foreach (record; csvReader!(string[string]) 64 * (text, null)) 65 * { 66 * writefln("%s works as a %s and earns $%s per year.", 67 * record["Name"], record["Occupation"], 68 * record["Salary"]); 69 * } 70 * 71 * // To read the same string from the file "filename.csv": 72 * 73 * auto file = File("filename.csv", "r"); 74 * 75 * foreach (record; csvReader!(string[string]) 76 * (file.byLine.joiner("\n"), null)) 77 * { 78 * writefln("%s works as a %s and earns $%s per year.", 79 * record["Name"], record["Occupation"], 80 * record["Salary"]); 81 * } 82 * ------- 83 * 84 * This module allows content to be iterated by record stored in a struct, 85 * class, associative array, or as a range of fields. Upon detection of an 86 * error an CSVException is thrown (can be disabled). csvNextToken has been 87 * made public to allow for attempted recovery. 88 * 89 * Disabling exceptions will lift many restrictions specified above. A quote 90 * can appear in a field if the field was not quoted. If in a quoted field any 91 * quote by itself, not at the end of a field, will end processing for that 92 * field. The field is ended when there is no input, even if the quote was not 93 * closed. 94 * 95 * See_Also: 96 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia 97 * Comma-separated values) 98 * 99 * Copyright: Copyright 2011 100 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 101 * Authors: Jesse Phillips 102 * Source: $(PHOBOSSRC std/csv.d) 103 */ 104 module std.csv; 105 106 import std.conv; 107 import std.exception : basicExceptionCtors; 108 import std.range.primitives; 109 import std.traits; 110 111 /** 112 * Exception containing the row and column for when an exception was thrown. 113 * 114 * Numbering of both row and col start at one and corresponds to the location 115 * in the file rather than any specified header. Special consideration should 116 * be made when there is failure to match the header see $(LREF 117 * HeaderMismatchException) for details. 118 * 119 * When performing type conversions, $(REF ConvException, std,conv) is stored in 120 * the `next` field. 121 */ 122 class CSVException : Exception 123 { 124 /// 125 size_t row, col; 126 127 // FIXME: Use std.exception.basicExceptionCtors here once 128 // https://issues.dlang.org/show_bug.cgi?id=11500 is fixed 129 130 this(string msg, string file = __FILE__, size_t line = __LINE__, 131 Throwable next = null) @nogc @safe pure nothrow 132 { 133 super(msg, file, line, next); 134 } 135 136 this(string msg, Throwable next, string file = __FILE__, 137 size_t line = __LINE__) @nogc @safe pure nothrow 138 { 139 super(msg, file, line, next); 140 } 141 142 this(string msg, size_t row, size_t col, Throwable next = null, 143 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow 144 { 145 super(msg, next, file, line); 146 this.row = row; 147 this.col = col; 148 } 149 150 override string toString() @safe pure const 151 { 152 return "(Row: " ~ to!string(row) ~ 153 ", Col: " ~ to!string(col) ~ ") " ~ msg; 154 } 155 } 156 157 /// 158 @safe unittest 159 { 160 import std.exception : collectException; 161 import std.algorithm.searching : count; 162 string text = "a,b,c\nHello,65"; 163 auto ex = collectException!CSVException(csvReader(text).count); 164 assert(ex.toString == "(Row: 0, Col: 0) Row 2's length 2 does not match previous length of 3."); 165 } 166 167 /// 168 @safe unittest 169 { 170 import std.exception : collectException; 171 import std.algorithm.searching : count; 172 import std.typecons : Tuple; 173 string text = "a,b\nHello,65"; 174 auto ex = collectException!CSVException(csvReader!(Tuple!(string,int))(text).count); 175 assert(ex.toString == "(Row: 1, Col: 2) Unexpected 'b' when converting from type string to type int"); 176 } 177 178 // https://issues.dlang.org/show_bug.cgi?id=24478 179 @safe unittest 180 { 181 import std.exception : collectException; 182 import std.algorithm.searching : count; 183 string text = "A, B\n1, 2, 3"; 184 auto ex = collectException!CSVException(csvReader!(string[string])(text, null).count); 185 assert(ex.toString == "(Row: 1, Col: 3) row contains more values than header"); 186 } 187 188 @safe pure unittest 189 { 190 import std.string; 191 auto e1 = new Exception("Foobar"); 192 auto e2 = new CSVException("args", e1); 193 assert(e2.next is e1); 194 195 size_t r = 13; 196 size_t c = 37; 197 198 auto e3 = new CSVException("argv", r, c); 199 assert(e3.row == r); 200 assert(e3.col == c); 201 202 auto em = e3.toString(); 203 assert(em.indexOf("13") != -1); 204 assert(em.indexOf("37") != -1); 205 } 206 207 /** 208 * Exception thrown when a Token is identified to not be completed: a quote is 209 * found in an unquoted field, data continues after a closing quote, or the 210 * quoted field was not closed before data was empty. 211 */ 212 class IncompleteCellException : CSVException 213 { 214 /** 215 * Data pulled from input before finding a problem 216 * 217 * This field is populated when using $(LREF csvReader) 218 * but not by $(LREF csvNextToken) as this data will have 219 * already been fed to the output range. 220 */ 221 dstring partialData; 222 223 mixin basicExceptionCtors; 224 } 225 226 /// 227 @safe unittest 228 { 229 import std.exception : assertThrown; 230 string text = "a,\"b,c\nHello,65,2.5"; 231 assertThrown!IncompleteCellException(text.csvReader(["a","b","c"])); 232 } 233 234 @safe pure unittest 235 { 236 auto e1 = new Exception("Foobar"); 237 auto e2 = new IncompleteCellException("args", e1); 238 assert(e2.next is e1); 239 } 240 241 /** 242 * Exception thrown under different conditions based on the type of $(D 243 * Contents). 244 * 245 * Structure, Class, and Associative Array 246 * $(UL 247 * $(LI When a header is provided but a matching column is not found) 248 * ) 249 * 250 * Other 251 * $(UL 252 * $(LI When a header is provided but a matching column is not found) 253 * $(LI Order did not match that found in the input) 254 * ) 255 * 256 * Since a row and column is not meaningful when a column specified by the 257 * header is not found in the data, both row and col will be zero. Otherwise 258 * row is always one and col is the first instance found in header that 259 * occurred before the previous starting at one. 260 */ 261 class HeaderMismatchException : CSVException 262 { 263 mixin basicExceptionCtors; 264 } 265 266 /// 267 @safe unittest 268 { 269 import std.exception : assertThrown; 270 string text = "a,b,c\nHello,65,2.5"; 271 assertThrown!HeaderMismatchException(text.csvReader(["b","c","invalid"])); 272 } 273 274 @safe pure unittest 275 { 276 auto e1 = new Exception("Foobar"); 277 auto e2 = new HeaderMismatchException("args", e1); 278 assert(e2.next is e1); 279 } 280 281 /** 282 * Determines the behavior for when an error is detected. 283 * 284 * Disabling exception will follow these rules: 285 * $(UL 286 * $(LI A quote can appear in a field if the field was not quoted.) 287 * $(LI If in a quoted field any quote by itself, not at the end of a 288 * field, will end processing for that field.) 289 * $(LI The field is ended when there is no input, even if the quote was 290 * not closed.) 291 * $(LI If the given header does not match the order in the input, the 292 * content will return as it is found in the input.) 293 * $(LI If the given header contains columns not found in the input they 294 * will be ignored.) 295 * ) 296 */ 297 enum Malformed 298 { 299 ignore, /// No exceptions are thrown due to incorrect CSV. 300 throwException /// Use exceptions when input has incorrect CSV. 301 } 302 303 /// 304 @safe unittest 305 { 306 import std.algorithm.comparison : equal; 307 import std.algorithm.searching : count; 308 import std.exception : assertThrown; 309 310 string text = "a,b,c\nHello,65,\"2.5"; 311 assertThrown!IncompleteCellException(text.csvReader.count); 312 313 // ignore the exceptions and try to handle invalid CSV 314 auto firstLine = text.csvReader!(string, Malformed.ignore)(null).front; 315 assert(firstLine.equal(["Hello", "65", "2.5"])); 316 } 317 318 /** 319 Returns an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) 320 for iterating over records found in `input`. 321 322 An optional `header` can be provided. The first record will be read in 323 as the header. If `Contents` is a struct then the header provided is 324 expected to correspond to the fields in the struct. When `Contents` is 325 not a type which can contain the entire record, the `header` must be 326 provided in the same order as the input or an exception is thrown. 327 328 Returns: 329 An input range R as defined by 330 $(REF isInputRange, std,range,primitives). When `Contents` is a 331 struct, class, or an associative array, the element type of R is 332 `Contents`, otherwise the element type of R is itself a range with 333 element type `Contents`. 334 335 If a `header` argument is provided, 336 the returned range provides a `header` field for accessing the header 337 from the input in array form. 338 339 Throws: 340 $(LREF CSVException) When a quote is found in an unquoted field, 341 data continues after a closing quote, the quoted field was not 342 closed before data was empty, a conversion failed, or when the row's 343 length does not match the previous length. 344 345 $(LREF HeaderMismatchException) when a header is provided but a 346 matching column is not found or the order did not match that found in 347 the input. Read the exception documentation for specific details of 348 when the exception is thrown for different types of `Contents`. 349 */ 350 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input, 351 Separator delimiter = ',', Separator quote = '"', 352 bool allowInconsistentDelimiterCount = false) 353 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 354 && isSomeChar!(Separator) 355 && !is(Contents T : T[U], U : string)) 356 { 357 return CsvReader!(Contents,ErrorLevel,Range, 358 Unqual!(ElementType!Range),string[]) 359 (input, delimiter, quote, allowInconsistentDelimiterCount); 360 } 361 362 /// ditto 363 auto csvReader(Contents = string, 364 Malformed ErrorLevel = Malformed.throwException, 365 Range, Header, Separator = char) 366 (Range input, Header header, 367 Separator delimiter = ',', Separator quote = '"', 368 bool allowInconsistentDelimiterCount = false) 369 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 370 && isSomeChar!(Separator) 371 && isForwardRange!Header 372 && isSomeString!(ElementType!Header)) 373 { 374 return CsvReader!(Contents,ErrorLevel,Range, 375 Unqual!(ElementType!Range),Header) 376 (input, header, delimiter, quote, allowInconsistentDelimiterCount); 377 } 378 379 /// ditto 380 auto csvReader(Contents = string, 381 Malformed ErrorLevel = Malformed.throwException, 382 Range, Header, Separator = char) 383 (Range input, Header header, 384 Separator delimiter = ',', Separator quote = '"', 385 bool allowInconsistentDelimiterCount = false) 386 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar) 387 && isSomeChar!(Separator) 388 && is(Header : typeof(null))) 389 { 390 return CsvReader!(Contents,ErrorLevel,Range, 391 Unqual!(ElementType!Range),string[]) 392 (input, cast(string[]) null, delimiter, quote, 393 allowInconsistentDelimiterCount); 394 } 395 396 397 /** 398 The `Contents` of the input can be provided if all the records are the 399 same type such as all integer data: 400 */ 401 @safe unittest 402 { 403 import std.algorithm.comparison : equal; 404 string text = "76,26,22"; 405 auto records = text.csvReader!int; 406 assert(records.equal!equal([ 407 [76, 26, 22], 408 ])); 409 } 410 411 /** 412 Using a struct with modified delimiter: 413 */ 414 @safe unittest 415 { 416 import std.algorithm.comparison : equal; 417 string text = "Hello;65;2.5\nWorld;123;7.5"; 418 struct Layout 419 { 420 string name; 421 int value; 422 double other; 423 } 424 425 auto records = text.csvReader!Layout(';'); 426 assert(records.equal([ 427 Layout("Hello", 65, 2.5), 428 Layout("World", 123, 7.5), 429 ])); 430 } 431 432 /** 433 Specifying `ErrorLevel` as $(LREF Malformed.ignore) will lift restrictions 434 on the format. This example shows that an exception is not thrown when 435 finding a quote in a field not quoted. 436 */ 437 @safe unittest 438 { 439 string text = "A \" is now part of the data"; 440 auto records = text.csvReader!(string, Malformed.ignore); 441 auto record = records.front; 442 443 assert(record.front == text); 444 } 445 446 /// Read only column "b" 447 @safe unittest 448 { 449 import std.algorithm.comparison : equal; 450 string text = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 451 auto records = text.csvReader!int(["b"]); 452 453 assert(records.equal!equal([ 454 [65], 455 [123], 456 ])); 457 } 458 459 /// Read while rearranging the columns by specifying a header with a different order" 460 @safe unittest 461 { 462 import std.algorithm.comparison : equal; 463 string text = "a,b,c\nHello,65,2.5\nWorld,123,7.5"; 464 struct Layout 465 { 466 int value; 467 double other; 468 string name; 469 } 470 471 auto records = text.csvReader!Layout(["b","c","a"]); 472 assert(records.equal([ 473 Layout(65, 2.5, "Hello"), 474 Layout(123, 7.5, "World") 475 ])); 476 } 477 478 /** 479 The header can also be left empty if the input contains a header row 480 and all columns should be iterated. 481 The header from the input can always be accessed from the `header` field. 482 */ 483 @safe unittest 484 { 485 string text = "a,b,c\nHello,65,63.63"; 486 auto records = text.csvReader(null); 487 488 assert(records.header == ["a","b","c"]); 489 } 490 491 /** 492 Handcrafted csv files tend to have an variable amount of columns. 493 494 By default `std.csv` will throw if the number of columns on a line 495 is unequal to the number of columns of the first line. 496 To allow, or disallow, a variable amount of columns a `bool` can be passed to 497 all overloads of the `csvReader` function as shown below. 498 */ 499 @safe unittest 500 { 501 import std.algorithm.comparison : equal; 502 503 string text = "76,26,22\n1,2\n3,4,5,6"; 504 auto records = text.csvReader!int(',', '"', true); 505 506 assert(records.equal!equal([ 507 [76, 26, 22], 508 [1, 2], 509 [3, 4, 5, 6] 510 ])); 511 } 512 513 /// ditto 514 @safe unittest 515 { 516 import std.algorithm.comparison : equal; 517 518 static struct Three 519 { 520 int a; 521 int b; 522 int c; 523 } 524 525 string text = "76,26,22\n1,2\n3,4,5,6"; 526 auto records = text.csvReader!Three(',', '"', true); 527 528 assert(records.equal([ 529 Three(76, 26, 22), 530 Three(1, 2, 0), 531 Three(3, 4, 5) 532 ])); 533 } 534 535 /// ditto 536 @safe unittest 537 { 538 import std.algorithm.comparison : equal; 539 540 auto text = "Name,Occupation,Salary\r" ~ 541 "Joe,Carpenter,300000\nFred,Blacksmith\r\n"; 542 543 auto r = csvReader!(string[string])(text, null, ',', '"', true); 544 545 assert(r.equal([ 546 [ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ], 547 [ "Name" : "Fred", "Occupation" : "Blacksmith" ] 548 ])); 549 } 550 551 // Test standard iteration over input. 552 @safe pure unittest 553 { 554 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six"; 555 auto records = csvReader(str); 556 557 int count; 558 foreach (record; records) 559 { 560 foreach (cell; record) 561 { 562 count++; 563 } 564 } 565 assert(count == 6); 566 } 567 568 // Test newline on last record 569 @safe pure unittest 570 { 571 string str = "one,two\nthree,four\n"; 572 auto records = csvReader(str); 573 records.popFront(); 574 records.popFront(); 575 assert(records.empty); 576 } 577 578 // Test shorter row length 579 @safe pure unittest 580 { 581 wstring str = "one,1\ntwo\nthree"w; 582 struct Layout 583 { 584 string name; 585 int value; 586 } 587 588 Layout[3] ans; 589 ans[0].name = "one"; 590 ans[0].value = 1; 591 ans[1].name = "two"; 592 ans[1].value = 0; 593 ans[2].name = "three"; 594 ans[2].value = 0; 595 596 auto records = csvReader!(Layout,Malformed.ignore)(str); 597 598 int count; 599 foreach (record; records) 600 { 601 assert(ans[count].name == record.name); 602 assert(ans[count].value == record.value); 603 count++; 604 } 605 } 606 607 // Test shorter row length exception 608 @safe pure unittest 609 { 610 import std.exception; 611 612 struct A 613 { 614 string a,b,c; 615 } 616 617 auto strs = ["one,1\ntwo", 618 "one\ntwo,2,二\nthree,3,三", 619 "one\ntwo,2\nthree,3", 620 "one,1\ntwo\nthree,3"]; 621 622 foreach (str; strs) 623 { 624 auto records = csvReader!A(str); 625 assertThrown!CSVException((){foreach (record; records) { }}()); 626 } 627 } 628 629 630 // Test structure conversion interface with unicode. 631 @safe pure unittest 632 { 633 import std.math.algebraic : abs; 634 635 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w; 636 struct Layout 637 { 638 string name; 639 int value; 640 double other; 641 } 642 643 Layout[2] ans; 644 ans[0].name = "\U00010143Hello"; 645 ans[0].value = 65; 646 ans[0].other = 63.63; 647 ans[1].name = "World"; 648 ans[1].value = 123; 649 ans[1].other = 3673.562; 650 651 auto records = csvReader!Layout(str); 652 653 int count; 654 foreach (record; records) 655 { 656 assert(ans[count].name == record.name); 657 assert(ans[count].value == record.value); 658 assert(abs(ans[count].other - record.other) < 0.00001); 659 count++; 660 } 661 assert(count == ans.length); 662 } 663 664 // Test input conversion interface 665 @safe pure unittest 666 { 667 import std.algorithm; 668 string str = `76,26,22`; 669 int[] ans = [76,26,22]; 670 auto records = csvReader!int(str); 671 672 foreach (record; records) 673 { 674 assert(equal(record, ans)); 675 } 676 } 677 678 // Test struct & header interface and same unicode 679 @safe unittest 680 { 681 import std.math.algebraic : abs; 682 683 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562"; 684 struct Layout 685 { 686 int value; 687 double other; 688 string name; 689 } 690 691 auto records = csvReader!Layout(str, ["b","c","a"]); 692 693 Layout[2] ans; 694 ans[0].name = "Hello"; 695 ans[0].value = 65; 696 ans[0].other = 63.63; 697 ans[1].name = "➊➋➂❹"; 698 ans[1].value = 123; 699 ans[1].other = 3673.562; 700 701 int count; 702 foreach (record; records) 703 { 704 assert(ans[count].name == record.name); 705 assert(ans[count].value == record.value); 706 assert(abs(ans[count].other - record.other) < 0.00001); 707 count++; 708 } 709 assert(count == ans.length); 710 711 } 712 713 // Test header interface 714 @safe unittest 715 { 716 import std.algorithm; 717 718 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 719 auto records = csvReader!int(str, ["b"]); 720 721 auto ans = [[65],[123]]; 722 foreach (record; records) 723 { 724 assert(equal(record, ans.front)); 725 ans.popFront(); 726 } 727 728 try 729 { 730 csvReader(str, ["c","b"]); 731 assert(0); 732 } 733 catch (HeaderMismatchException e) 734 { 735 assert(e.col == 2); 736 } 737 auto records2 = csvReader!(string,Malformed.ignore) 738 (str, ["b","a"], ',', '"'); 739 740 auto ans2 = [["Hello","65"],["World","123"]]; 741 foreach (record; records2) 742 { 743 assert(equal(record, ans2.front)); 744 ans2.popFront(); 745 } 746 747 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4"; 748 records2 = csvReader!(string,Malformed.ignore) 749 (str, ["a","b","c","d"], ',', '"'); 750 751 ans2 = [["Joe","Carpenter"],["Fred","Fly"]]; 752 foreach (record; records2) 753 { 754 assert(equal(record, ans2.front)); 755 ans2.popFront(); 756 } 757 } 758 759 // Test null header interface 760 @safe unittest 761 { 762 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; 763 auto records = csvReader(str, ["a"]); 764 765 assert(records.header == ["a","b","c"]); 766 } 767 768 // Test unchecked read 769 @safe pure unittest 770 { 771 string str = "one \"quoted\""; 772 foreach (record; csvReader!(string,Malformed.ignore)(str)) 773 { 774 foreach (cell; record) 775 { 776 assert(cell == "one \"quoted\""); 777 } 778 } 779 780 str = "one \"quoted\",two \"quoted\" end"; 781 struct Ans 782 { 783 string a,b; 784 } 785 foreach (record; csvReader!(Ans,Malformed.ignore)(str)) 786 { 787 assert(record.a == "one \"quoted\""); 788 assert(record.b == "two \"quoted\" end"); 789 } 790 } 791 792 // Test partial data returned 793 @safe pure unittest 794 { 795 string str = "\"one\nnew line"; 796 797 try 798 { 799 foreach (record; csvReader(str)) 800 {} 801 assert(0); 802 } 803 catch (IncompleteCellException ice) 804 { 805 assert(ice.partialData == "one\nnew line"); 806 } 807 } 808 809 // Test Windows line break 810 @safe pure unittest 811 { 812 string str = "one,two\r\nthree"; 813 814 auto records = csvReader(str); 815 auto record = records.front; 816 assert(record.front == "one"); 817 record.popFront(); 818 assert(record.front == "two"); 819 records.popFront(); 820 record = records.front; 821 assert(record.front == "three"); 822 } 823 824 825 // Test associative array support with unicode separator 826 @safe unittest 827 { 828 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63"; 829 830 auto records = csvReader!(string[string])(str,["3","1"],'❁'); 831 int count; 832 foreach (record; records) 833 { 834 count++; 835 assert(record["1"] == "34"); 836 assert(record["3"] == "63"); 837 } 838 assert(count == 2); 839 } 840 841 // Test restricted range 842 @safe unittest 843 { 844 import std.typecons; 845 struct InputRange 846 { 847 dstring text; 848 849 this(dstring txt) 850 { 851 text = txt; 852 } 853 854 @property auto empty() 855 { 856 return text.empty; 857 } 858 859 void popFront() 860 { 861 text.popFront(); 862 } 863 864 @property dchar front() 865 { 866 return text[0]; 867 } 868 } 869 auto ir = InputRange("Name,Occupation,Salary\r"d~ 870 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d); 871 872 foreach (record; csvReader(ir, cast(string[]) null)) 873 foreach (cell; record) {} 874 foreach (record; csvReader!(Tuple!(string, string, int)) 875 (ir,cast(string[]) null)) {} 876 foreach (record; csvReader!(string[string]) 877 (ir,cast(string[]) null)) {} 878 } 879 880 @safe unittest // const/immutable dchars 881 { 882 import std.algorithm.iteration : map; 883 import std.array : array; 884 const(dchar)[] c = "foo,bar\n"; 885 assert(csvReader(c).map!array.array == [["foo", "bar"]]); 886 immutable(dchar)[] i = "foo,bar\n"; 887 assert(csvReader(i).map!array.array == [["foo", "bar"]]); 888 } 889 890 /* 891 * This struct is stored on the heap for when the structures 892 * are passed around. 893 */ 894 private pure struct Input(Range, Malformed ErrorLevel) 895 { 896 Range range; 897 size_t row, col; 898 static if (ErrorLevel == Malformed.throwException) 899 size_t rowLength; 900 } 901 902 /* 903 * Range for iterating CSV records. 904 * 905 * This range is returned by the $(LREF csvReader) functions. It can be 906 * created in a similar manner to allow `ErrorLevel` be set to $(LREF 907 * Malformed).ignore if best guess processing should take place. 908 */ 909 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header) 910 if (isSomeChar!Separator && isInputRange!Range 911 && is(immutable ElementType!Range == immutable dchar) 912 && isForwardRange!Header && isSomeString!(ElementType!Header)) 913 { 914 private: 915 Input!(Range, ErrorLevel)* _input; 916 Separator _separator; 917 Separator _quote; 918 size_t[] indices; 919 bool _empty; 920 bool _allowInconsistentDelimiterCount; 921 static if (is(Contents == struct) || is(Contents == class)) 922 { 923 Contents recordContent; 924 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange; 925 } 926 else static if (is(Contents T : T[U], U : string)) 927 { 928 Contents recordContent; 929 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange; 930 } 931 else 932 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange; 933 public: 934 /** 935 * Header from the input in array form. 936 * 937 * ------- 938 * string str = "a,b,c\nHello,65,63.63"; 939 * auto records = csvReader(str, ["a"]); 940 * 941 * assert(records.header == ["a","b","c"]); 942 * ------- 943 */ 944 string[] header; 945 946 /** 947 * Constructor to initialize the input, delimiter and quote for input 948 * without a header. 949 * 950 * ------- 951 * string str = `76;^26^;22`; 952 * int[] ans = [76,26,22]; 953 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 954 * (str, ';', '^'); 955 * 956 * foreach (record; records) 957 * { 958 * assert(equal(record, ans)); 959 * } 960 * ------- 961 */ 962 this(Range input, Separator delimiter, Separator quote, 963 bool allowInconsistentDelimiterCount) 964 { 965 _input = new Input!(Range, ErrorLevel)(input); 966 _separator = delimiter; 967 _quote = quote; 968 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 969 970 if (_input.range.empty) 971 { 972 _empty = true; 973 return; 974 } 975 976 prime(); 977 } 978 979 /** 980 * Constructor to initialize the input, delimiter and quote for input 981 * with a header. 982 * 983 * ------- 984 * string str = `high;mean;low\n76;^26^;22`; 985 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 986 * (str, ["high","low"], ';', '^'); 987 * 988 * int[] ans = [76,22]; 989 * foreach (record; records) 990 * { 991 * assert(equal(record, ans)); 992 * } 993 * ------- 994 * 995 * Throws: 996 * $(LREF HeaderMismatchException) when a header is provided but a 997 * matching column is not found or the order did not match that found 998 * in the input (non-struct). 999 */ 1000 this(Range input, Header colHeaders, Separator delimiter, Separator quote, 1001 bool allowInconsistentDelimiterCount) 1002 { 1003 _input = new Input!(Range, ErrorLevel)(input); 1004 _separator = delimiter; 1005 _quote = quote; 1006 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 1007 1008 if (_input.range.empty) 1009 { 1010 _empty = true; 1011 return; 1012 } 1013 1014 size_t[string] colToIndex; 1015 foreach (h; colHeaders) 1016 { 1017 colToIndex[h] = size_t.max; 1018 } 1019 1020 auto r = CsvRecord!(string, ErrorLevel, Range, Separator) 1021 (_input, _separator, _quote, indices, 1022 _allowInconsistentDelimiterCount); 1023 1024 size_t colIndex; 1025 foreach (col; r) 1026 { 1027 header ~= col; 1028 auto ptr = col in colToIndex; 1029 if (ptr) 1030 *ptr = colIndex; 1031 colIndex++; 1032 } 1033 // The above loop empties the header row. 1034 recordRange._empty = true; 1035 recordRange._allowInconsistentDelimiterCount = 1036 allowInconsistentDelimiterCount; 1037 1038 indices.length = colToIndex.length; 1039 int i; 1040 foreach (h; colHeaders) 1041 { 1042 immutable index = colToIndex[h]; 1043 static if (ErrorLevel != Malformed.ignore) 1044 if (index == size_t.max) 1045 throw new HeaderMismatchException 1046 ("Header not found: " ~ to!string(h)); 1047 indices[i++] = index; 1048 } 1049 1050 static if (!is(Contents == struct) && !is(Contents == class)) 1051 { 1052 static if (is(Contents T : T[U], U : string)) 1053 { 1054 import std.algorithm.sorting : sort; 1055 sort(indices); 1056 } 1057 else static if (ErrorLevel == Malformed.ignore) 1058 { 1059 import std.algorithm.sorting : sort; 1060 sort(indices); 1061 } 1062 else 1063 { 1064 import std.algorithm.searching : findAdjacent; 1065 import std.algorithm.sorting : isSorted; 1066 if (!isSorted(indices)) 1067 { 1068 auto ex = new HeaderMismatchException 1069 ("Header in input does not match specified header."); 1070 findAdjacent!"a > b"(indices); 1071 ex.row = 1; 1072 ex.col = indices.front; 1073 1074 throw ex; 1075 } 1076 } 1077 } 1078 1079 popFront(); 1080 } 1081 1082 /** 1083 * Part of an input range as defined by 1084 * $(REF isInputRange, std,range,primitives). 1085 * 1086 * Returns: 1087 * If `Contents` is a struct, will be filled with record data. 1088 * 1089 * If `Contents` is a class, will be filled with record data. 1090 * 1091 * If `Contents` is a associative array, will be filled 1092 * with record data. 1093 * 1094 * If `Contents` is non-struct, a $(LREF CsvRecord) will be 1095 * returned. 1096 */ 1097 @property auto front() 1098 { 1099 assert(!empty, "Attempting to fetch the front of an empty CsvReader"); 1100 static if (is(Contents == struct) || is(Contents == class)) 1101 { 1102 return recordContent; 1103 } 1104 else static if (is(Contents T : T[U], U : string)) 1105 { 1106 return recordContent; 1107 } 1108 else 1109 { 1110 return recordRange; 1111 } 1112 } 1113 1114 /** 1115 * Part of an input range as defined by 1116 * $(REF isInputRange, std,range,primitives). 1117 */ 1118 @property bool empty() @safe @nogc pure nothrow const 1119 { 1120 return _empty; 1121 } 1122 1123 /** 1124 * Part of an input range as defined by 1125 * $(REF isInputRange, std,range,primitives). 1126 * 1127 * Throws: 1128 * $(LREF CSVException) When a quote is found in an unquoted field, 1129 * data continues after a closing quote, the quoted field was not 1130 * closed before data was empty, a conversion failed, or when the 1131 * row's length does not match the previous length. 1132 */ 1133 void popFront() 1134 { 1135 while (!recordRange.empty) 1136 { 1137 recordRange.popFront(); 1138 } 1139 1140 static if (ErrorLevel == Malformed.throwException) 1141 if (_input.rowLength == 0) 1142 _input.rowLength = _input.col; 1143 1144 _input.col = 0; 1145 1146 if (!_input.range.empty) 1147 { 1148 if (_input.range.front == '\r') 1149 { 1150 _input.range.popFront(); 1151 if (!_input.range.empty && _input.range.front == '\n') 1152 _input.range.popFront(); 1153 } 1154 else if (_input.range.front == '\n') 1155 _input.range.popFront(); 1156 } 1157 1158 if (_input.range.empty) 1159 { 1160 _empty = true; 1161 return; 1162 } 1163 1164 prime(); 1165 } 1166 1167 private void prime() 1168 { 1169 if (_empty) 1170 return; 1171 _input.row++; 1172 static if (is(Contents == struct) || is(Contents == class)) 1173 { 1174 recordRange = typeof(recordRange) 1175 (_input, _separator, _quote, null, 1176 _allowInconsistentDelimiterCount); 1177 } 1178 else 1179 { 1180 recordRange = typeof(recordRange) 1181 (_input, _separator, _quote, indices, 1182 _allowInconsistentDelimiterCount); 1183 } 1184 1185 static if (is(Contents T : T[U], U : string)) 1186 { 1187 T[U] aa; 1188 try 1189 { 1190 for (; !recordRange.empty; recordRange.popFront()) 1191 { 1192 const i = _input.col - 1; 1193 if (i >= header.length) 1194 throw new CSVException("row contains more values than header", _input.row, _input.col); 1195 aa[header[i]] = recordRange.front; 1196 } 1197 } 1198 catch (ConvException e) 1199 { 1200 throw new CSVException(e.msg, _input.row, _input.col, e); 1201 } 1202 1203 recordContent = aa; 1204 } 1205 else static if (is(Contents == struct) || is(Contents == class)) 1206 { 1207 static if (is(Contents == class)) 1208 recordContent = new typeof(recordContent)(); 1209 else 1210 recordContent = typeof(recordContent).init; 1211 size_t colIndex; 1212 try 1213 { 1214 for (; !recordRange.empty;) 1215 { 1216 auto colData = recordRange.front; 1217 scope(exit) colIndex++; 1218 if (indices.length > 0) 1219 { 1220 foreach (ti, ToType; Fields!(Contents)) 1221 { 1222 if (indices[ti] == colIndex) 1223 { 1224 static if (!isSomeString!ToType) skipWS(colData); 1225 recordContent.tupleof[ti] = to!ToType(colData); 1226 } 1227 } 1228 } 1229 else 1230 { 1231 foreach (ti, ToType; Fields!(Contents)) 1232 { 1233 if (ti == colIndex) 1234 { 1235 static if (!isSomeString!ToType) skipWS(colData); 1236 recordContent.tupleof[ti] = to!ToType(colData); 1237 } 1238 } 1239 } 1240 recordRange.popFront(); 1241 } 1242 } 1243 catch (ConvException e) 1244 { 1245 throw new CSVException(e.msg, _input.row, colIndex, e); 1246 } 1247 } 1248 } 1249 } 1250 1251 @safe pure unittest 1252 { 1253 import std.algorithm.comparison : equal; 1254 1255 string str = `76;^26^;22`; 1256 int[] ans = [76,26,22]; 1257 auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) 1258 (str, ';', '^', false); 1259 1260 foreach (record; records) 1261 { 1262 assert(equal(record, ans)); 1263 } 1264 } 1265 1266 // https://issues.dlang.org/show_bug.cgi?id=15545 1267 // @system due to the catch for Throwable 1268 @system pure unittest 1269 { 1270 import std.exception : assertNotThrown; 1271 enum failData = 1272 "name, surname, age 1273 Joe, Joker, 99\r"; 1274 auto r = csvReader(failData); 1275 assertNotThrown((){foreach (entry; r){}}()); 1276 } 1277 1278 /* 1279 * This input range is accessible through $(LREF CsvReader) when the 1280 * requested `Contents` type is neither a structure or an associative array. 1281 */ 1282 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator) 1283 if (!is(Contents == class) && !is(Contents == struct)) 1284 { 1285 import std.array : appender; 1286 private: 1287 Input!(Range, ErrorLevel)* _input; 1288 Separator _separator; 1289 Separator _quote; 1290 Contents curContentsoken; 1291 typeof(appender!(dchar[])()) _front; 1292 bool _empty; 1293 bool _allowInconsistentDelimiterCount; 1294 size_t[] _popCount; 1295 public: 1296 /* 1297 * Params: 1298 * input = Pointer to a character $(REF_ALTTEXT input range, isInputRange, std,range,primitives) 1299 * delimiter = Separator for each column 1300 * quote = Character used for quotation 1301 * indices = An array containing which columns will be returned. 1302 * If empty, all columns are returned. List must be in order. 1303 */ 1304 this(Input!(Range, ErrorLevel)* input, Separator delimiter, 1305 Separator quote, size_t[] indices, 1306 bool allowInconsistentDelimiterCount) 1307 { 1308 _input = input; 1309 _separator = delimiter; 1310 _quote = quote; 1311 1312 _front = appender!(dchar[])(); 1313 _popCount = indices.dup; 1314 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount; 1315 1316 // If a header was given, each call to popFront will need 1317 // to eliminate so many tokens. This calculates 1318 // how many will be skipped to get to the next header column 1319 size_t normalizer; 1320 foreach (ref c; _popCount) 1321 { 1322 static if (ErrorLevel == Malformed.ignore) 1323 { 1324 // If we are not throwing exceptions 1325 // a header may not exist, indices are sorted 1326 // and will be size_t.max if not found. 1327 if (c == size_t.max) 1328 break; 1329 } 1330 c -= normalizer; 1331 normalizer += c + 1; 1332 } 1333 1334 prime(); 1335 } 1336 1337 /** 1338 * Part of an input range as defined by 1339 * $(REF isInputRange, std,range,primitives). 1340 */ 1341 @property Contents front() @safe pure 1342 { 1343 assert(!empty, "Attempting to fetch the front of an empty CsvRecord"); 1344 return curContentsoken; 1345 } 1346 1347 /** 1348 * Part of an input range as defined by 1349 * $(REF isInputRange, std,range,primitives). 1350 */ 1351 @property bool empty() @safe pure nothrow @nogc const 1352 { 1353 return _empty; 1354 } 1355 1356 /* 1357 * CsvRecord is complete when input 1358 * is empty or starts with record break 1359 */ 1360 private bool recordEnd() 1361 { 1362 if (_input.range.empty 1363 || _input.range.front == '\n' 1364 || _input.range.front == '\r') 1365 { 1366 return true; 1367 } 1368 return false; 1369 } 1370 1371 1372 /** 1373 * Part of an input range as defined by 1374 * $(REF isInputRange, std,range,primitives). 1375 * 1376 * Throws: 1377 * $(LREF CSVException) When a quote is found in an unquoted field, 1378 * data continues after a closing quote, the quoted field was not 1379 * closed before data was empty, a conversion failed, or when the 1380 * row's length does not match the previous length. 1381 */ 1382 void popFront() 1383 { 1384 static if (ErrorLevel == Malformed.throwException) 1385 import std.format : format; 1386 // Skip last of record when header is depleted. 1387 if (_popCount.ptr && _popCount.empty) 1388 while (!recordEnd()) 1389 { 1390 prime(1); 1391 } 1392 1393 if (recordEnd()) 1394 { 1395 _empty = true; 1396 static if (ErrorLevel == Malformed.throwException) 1397 { 1398 if (_input.rowLength != 0 && _input.col != _input.rowLength 1399 && !_allowInconsistentDelimiterCount) 1400 { 1401 throw new CSVException( 1402 format("Row %s's length %s does not match "~ 1403 "previous length of %s.", _input.row, 1404 _input.col, _input.rowLength)); 1405 } 1406 } 1407 return; 1408 } 1409 else 1410 { 1411 static if (ErrorLevel == Malformed.throwException) 1412 { 1413 if (_input.rowLength != 0 && _input.col > _input.rowLength) 1414 { 1415 if (!_allowInconsistentDelimiterCount) 1416 { 1417 throw new CSVException( 1418 format("Row %s's length %s does not match "~ 1419 "previous length of %s.", _input.row, 1420 _input.col, _input.rowLength)); 1421 } 1422 else 1423 { 1424 _empty = true; 1425 return; 1426 } 1427 } 1428 } 1429 } 1430 1431 // Separator is left on the end of input from the last call. 1432 // This cannot be moved to after the call to csvNextToken as 1433 // there may be an empty record after it. 1434 if (_input.range.front == _separator) 1435 _input.range.popFront(); 1436 1437 _front.shrinkTo(0); 1438 1439 prime(); 1440 } 1441 1442 /* 1443 * Handles moving to the next skipNum token. 1444 */ 1445 private void prime(size_t skipNum) 1446 { 1447 foreach (i; 0 .. skipNum) 1448 { 1449 _input.col++; 1450 _front.shrinkTo(0); 1451 if (_input.range.front == _separator) 1452 _input.range.popFront(); 1453 1454 try 1455 csvNextToken!(Range, ErrorLevel, Separator) 1456 (_input.range, _front, _separator, _quote,false); 1457 catch (IncompleteCellException ice) 1458 { 1459 ice.row = _input.row; 1460 ice.col = _input.col; 1461 ice.partialData = _front.data.idup; 1462 throw ice; 1463 } 1464 catch (ConvException e) 1465 { 1466 throw new CSVException(e.msg, _input.row, _input.col, e); 1467 } 1468 } 1469 } 1470 1471 private void prime() 1472 { 1473 try 1474 { 1475 _input.col++; 1476 csvNextToken!(Range, ErrorLevel, Separator) 1477 (_input.range, _front, _separator, _quote,false); 1478 } 1479 catch (IncompleteCellException ice) 1480 { 1481 ice.row = _input.row; 1482 ice.col = _input.col; 1483 ice.partialData = _front.data.idup; 1484 throw ice; 1485 } 1486 1487 auto skipNum = _popCount.empty ? 0 : _popCount.front; 1488 if (!_popCount.empty) 1489 _popCount.popFront(); 1490 1491 if (skipNum == size_t.max) 1492 { 1493 while (!recordEnd()) 1494 prime(1); 1495 _empty = true; 1496 return; 1497 } 1498 1499 if (skipNum) 1500 prime(skipNum); 1501 1502 auto data = _front.data; 1503 static if (!isSomeString!Contents) skipWS(data); 1504 try curContentsoken = to!Contents(data); 1505 catch (ConvException e) 1506 { 1507 throw new CSVException(e.msg, _input.row, _input.col, e); 1508 } 1509 } 1510 } 1511 1512 /** 1513 * Lower level control over parsing CSV 1514 * 1515 * This function consumes the input. After each call the input will 1516 * start with either a delimiter or record break (\n, \r\n, \r) which 1517 * must be removed for subsequent calls. 1518 * 1519 * Params: 1520 * input = Any CSV input 1521 * ans = The first field in the input 1522 * sep = The character to represent a comma in the specification 1523 * quote = The character to represent a quote in the specification 1524 * startQuoted = Whether the input should be considered to already be in 1525 * quotes 1526 * 1527 * Throws: 1528 * $(LREF IncompleteCellException) When a quote is found in an unquoted 1529 * field, data continues after a closing quote, or the quoted field was 1530 * not closed before data was empty. 1531 */ 1532 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException, 1533 Separator, Output) 1534 (ref Range input, ref Output ans, 1535 Separator sep, Separator quote, 1536 bool startQuoted = false) 1537 if (isSomeChar!Separator && isInputRange!Range 1538 && is(immutable ElementType!Range == immutable dchar) 1539 && isOutputRange!(Output, dchar)) 1540 { 1541 bool quoted = startQuoted; 1542 bool escQuote; 1543 if (input.empty) 1544 return; 1545 1546 if (input.front == '\n') 1547 return; 1548 if (input.front == '\r') 1549 return; 1550 1551 if (input.front == quote) 1552 { 1553 quoted = true; 1554 input.popFront(); 1555 } 1556 1557 while (!input.empty) 1558 { 1559 assert(!(quoted && escQuote), 1560 "Invalid quotation state in csvNextToken"); 1561 if (!quoted) 1562 { 1563 // When not quoted the token ends at sep 1564 if (input.front == sep) 1565 break; 1566 if (input.front == '\r') 1567 break; 1568 if (input.front == '\n') 1569 break; 1570 } 1571 if (!quoted && !escQuote) 1572 { 1573 if (input.front == quote) 1574 { 1575 // Not quoted, but quote found 1576 static if (ErrorLevel == Malformed.throwException) 1577 throw new IncompleteCellException( 1578 "Quote located in unquoted token"); 1579 else static if (ErrorLevel == Malformed.ignore) 1580 ans.put(quote); 1581 } 1582 else 1583 { 1584 // Not quoted, non-quote character 1585 ans.put(input.front); 1586 } 1587 } 1588 else 1589 { 1590 if (input.front == quote) 1591 { 1592 // Quoted, quote found 1593 // By turning off quoted and turning on escQuote 1594 // I can tell when to add a quote to the string 1595 // escQuote is turned to false when it escapes a 1596 // quote or is followed by a non-quote (see outside else). 1597 // They are mutually exclusive, but provide different 1598 // information. 1599 if (escQuote) 1600 { 1601 escQuote = false; 1602 quoted = true; 1603 ans.put(quote); 1604 } else 1605 { 1606 escQuote = true; 1607 quoted = false; 1608 } 1609 } 1610 else 1611 { 1612 // Quoted, non-quote character 1613 if (escQuote) 1614 { 1615 static if (ErrorLevel == Malformed.throwException) 1616 throw new IncompleteCellException( 1617 "Content continues after end quote, " ~ 1618 "or needs to be escaped."); 1619 else static if (ErrorLevel == Malformed.ignore) 1620 break; 1621 } 1622 ans.put(input.front); 1623 } 1624 } 1625 input.popFront(); 1626 } 1627 1628 static if (ErrorLevel == Malformed.throwException) 1629 if (quoted && (input.empty || input.front == '\n' || input.front == '\r')) 1630 throw new IncompleteCellException( 1631 "Data continues on future lines or trailing quote"); 1632 1633 } 1634 1635 /// 1636 @safe unittest 1637 { 1638 import std.array : appender; 1639 import std.range.primitives : popFront; 1640 1641 string str = "65,63\n123,3673"; 1642 1643 auto a = appender!(char[])(); 1644 1645 csvNextToken(str,a,',','"'); 1646 assert(a.data == "65"); 1647 assert(str == ",63\n123,3673"); 1648 1649 str.popFront(); 1650 a.shrinkTo(0); 1651 csvNextToken(str,a,',','"'); 1652 assert(a.data == "63"); 1653 assert(str == "\n123,3673"); 1654 1655 str.popFront(); 1656 a.shrinkTo(0); 1657 csvNextToken(str,a,',','"'); 1658 assert(a.data == "123"); 1659 assert(str == ",3673"); 1660 } 1661 1662 // Test csvNextToken on simplest form and correct format. 1663 @safe pure unittest 1664 { 1665 import std.array; 1666 1667 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"; 1668 1669 auto a = appender!(dchar[])(); 1670 csvNextToken!string(str,a,',','"'); 1671 assert(a.data == "\U00010143Hello"); 1672 assert(str == ",65,63.63\nWorld,123,3673.562"); 1673 1674 str.popFront(); 1675 a.shrinkTo(0); 1676 csvNextToken(str,a,',','"'); 1677 assert(a.data == "65"); 1678 assert(str == ",63.63\nWorld,123,3673.562"); 1679 1680 str.popFront(); 1681 a.shrinkTo(0); 1682 csvNextToken(str,a,',','"'); 1683 assert(a.data == "63.63"); 1684 assert(str == "\nWorld,123,3673.562"); 1685 1686 str.popFront(); 1687 a.shrinkTo(0); 1688 csvNextToken(str,a,',','"'); 1689 assert(a.data == "World"); 1690 assert(str == ",123,3673.562"); 1691 1692 str.popFront(); 1693 a.shrinkTo(0); 1694 csvNextToken(str,a,',','"'); 1695 assert(a.data == "123"); 1696 assert(str == ",3673.562"); 1697 1698 str.popFront(); 1699 a.shrinkTo(0); 1700 csvNextToken(str,a,',','"'); 1701 assert(a.data == "3673.562"); 1702 assert(str == ""); 1703 } 1704 1705 // Test quoted tokens 1706 @safe pure unittest 1707 { 1708 import std.array; 1709 1710 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; 1711 1712 auto a = appender!(dchar[])(); 1713 csvNextToken!string(str,a,',','"'); 1714 assert(a.data == "one"); 1715 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); 1716 1717 str.popFront(); 1718 a.shrinkTo(0); 1719 csvNextToken(str,a,',','"'); 1720 assert(a.data == "two"); 1721 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); 1722 1723 str.popFront(); 1724 a.shrinkTo(0); 1725 csvNextToken(str,a,',','"'); 1726 assert(a.data == "three \"quoted\""); 1727 assert(str == `,"",` ~ "\"five\nnew line\"\nsix"); 1728 1729 str.popFront(); 1730 a.shrinkTo(0); 1731 csvNextToken(str,a,',','"'); 1732 assert(a.data == ""); 1733 assert(str == ",\"five\nnew line\"\nsix"); 1734 1735 str.popFront(); 1736 a.shrinkTo(0); 1737 csvNextToken(str,a,',','"'); 1738 assert(a.data == "five\nnew line"); 1739 assert(str == "\nsix"); 1740 1741 str.popFront(); 1742 a.shrinkTo(0); 1743 csvNextToken(str,a,',','"'); 1744 assert(a.data == "six"); 1745 assert(str == ""); 1746 } 1747 1748 // Test empty data is pulled at end of record. 1749 @safe pure unittest 1750 { 1751 import std.array; 1752 1753 string str = "one,"; 1754 auto a = appender!(dchar[])(); 1755 csvNextToken(str,a,',','"'); 1756 assert(a.data == "one"); 1757 assert(str == ","); 1758 1759 a.shrinkTo(0); 1760 csvNextToken(str,a,',','"'); 1761 assert(a.data == ""); 1762 } 1763 1764 // Test exceptions 1765 @safe pure unittest 1766 { 1767 import std.array; 1768 1769 string str = "\"one\nnew line"; 1770 1771 typeof(appender!(dchar[])()) a; 1772 try 1773 { 1774 a = appender!(dchar[])(); 1775 csvNextToken(str,a,',','"'); 1776 assert(0); 1777 } 1778 catch (IncompleteCellException ice) 1779 { 1780 assert(a.data == "one\nnew line"); 1781 assert(str == ""); 1782 } 1783 1784 str = "Hello world\""; 1785 1786 try 1787 { 1788 a = appender!(dchar[])(); 1789 csvNextToken(str,a,',','"'); 1790 assert(0); 1791 } 1792 catch (IncompleteCellException ice) 1793 { 1794 assert(a.data == "Hello world"); 1795 assert(str == "\""); 1796 } 1797 1798 str = "one, two \"quoted\" end"; 1799 1800 a = appender!(dchar[])(); 1801 csvNextToken!(string,Malformed.ignore)(str,a,',','"'); 1802 assert(a.data == "one"); 1803 str.popFront(); 1804 a.shrinkTo(0); 1805 csvNextToken!(string,Malformed.ignore)(str,a,',','"'); 1806 assert(a.data == " two \"quoted\" end"); 1807 } 1808 1809 // Test modifying token delimiter 1810 @safe pure unittest 1811 { 1812 import std.array; 1813 1814 string str = `one|two|/three "quoted"/|//`; 1815 1816 auto a = appender!(dchar[])(); 1817 csvNextToken(str,a, '|','/'); 1818 assert(a.data == "one"d); 1819 assert(str == `|two|/three "quoted"/|//`); 1820 1821 str.popFront(); 1822 a.shrinkTo(0); 1823 csvNextToken(str,a, '|','/'); 1824 assert(a.data == "two"d); 1825 assert(str == `|/three "quoted"/|//`); 1826 1827 str.popFront(); 1828 a.shrinkTo(0); 1829 csvNextToken(str,a, '|','/'); 1830 assert(a.data == `three "quoted"`); 1831 assert(str == `|//`); 1832 1833 str.popFront(); 1834 a.shrinkTo(0); 1835 csvNextToken(str,a, '|','/'); 1836 assert(a.data == ""d); 1837 } 1838 1839 // https://issues.dlang.org/show_bug.cgi?id=8908 1840 @safe pure unittest 1841 { 1842 string csv = ` 1.0, 2.0, 3.0 1843 4.0, 5.0, 6.0`; 1844 1845 static struct Data { real a, b, c; } 1846 size_t i = 0; 1847 foreach (data; csvReader!Data(csv)) with (data) 1848 { 1849 int[] row = [cast(int) a, cast(int) b, cast(int) c]; 1850 if (i == 0) 1851 assert(row == [1, 2, 3]); 1852 else 1853 assert(row == [4, 5, 6]); 1854 ++i; 1855 } 1856 1857 i = 0; 1858 foreach (data; csvReader!real(csv)) 1859 { 1860 auto a = data.front; data.popFront(); 1861 auto b = data.front; data.popFront(); 1862 auto c = data.front; 1863 int[] row = [cast(int) a, cast(int) b, cast(int) c]; 1864 if (i == 0) 1865 assert(row == [1, 2, 3]); 1866 else 1867 assert(row == [4, 5, 6]); 1868 ++i; 1869 } 1870 } 1871 1872 // https://issues.dlang.org/show_bug.cgi?id=21629 1873 @safe pure unittest 1874 { 1875 import std.typecons : Tuple; 1876 struct Reccord 1877 { 1878 string a; 1879 string b; 1880 } 1881 1882 auto header = ["a" ,"b"]; 1883 string input = ""; 1884 assert(csvReader!Reccord(input).empty, "This should be empty"); 1885 assert(csvReader!Reccord(input, header).empty, "This should be empty"); 1886 assert(csvReader!(Tuple!(string,string))(input).empty, "This should be empty"); 1887 assert(csvReader!(string[string])(input, header).empty, "This should be empty"); 1888 assert(csvReader!(string[string])(input, null).empty, "This should be empty"); 1889 assert(csvReader!(int)(input, null).empty, "This should be empty"); 1890 }