1 //Written in the D programming language
2
3 /**
4 * Implements functionality to read Comma Separated Values and its variants
5 * from an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of `dchar`.
6 *
7 * Comma Separated Values provide a simple means to transfer and store
8 * tabular data. It has been common for programs to use their own
9 * variant of the CSV format. This parser will loosely follow the
10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere
11 * to the following criteria (differences from RFC-4180 in parentheses):
12 *
13 * $(UL
14 * $(LI A record is separated by a new line (CRLF,LF,CR))
15 * $(LI A final record may end with a new line)
16 * $(LI A header may be provided as the first record in input)
17 * $(LI A record has fields separated by a comma (customizable))
18 * $(LI A field containing new lines, commas, or double quotes
19 * should be enclosed in double quotes (customizable))
20 * $(LI Double quotes in a field are escaped with a double quote)
21 * $(LI Each record should contain the same number of fields)
22 * )
23 *
24 * Example:
25 *
26 * -------
27 * import std.algorithm;
28 * import std.array;
29 * import std.csv;
30 * import std.stdio;
31 * import std.typecons;
32 *
33 * void main()
34 * {
35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
36 *
37 * foreach (record; csvReader!(Tuple!(string, string, int))(text))
38 * {
39 * writefln("%s works as a %s and earns $%d per year",
40 * record[0], record[1], record[2]);
41 * }
42 *
43 * // To read the same string from the file "filename.csv":
44 *
45 * auto file = File("filename.csv", "r");
46 * foreach (record;
47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int)))
48 * {
49 * writefln("%s works as a %s and earns $%d per year",
50 * record[0], record[1], record[2]);
51 * }
52 }
53 * }
54 * -------
55 *
56 * When an input contains a header the `Contents` can be specified as an
57 * associative array. Passing null to signify that a header is present.
58 *
59 * -------
60 * auto text = "Name,Occupation,Salary\r" ~
61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
62 *
63 * foreach (record; csvReader!(string[string])
64 * (text, null))
65 * {
66 * writefln("%s works as a %s and earns $%s per year.",
67 * record["Name"], record["Occupation"],
68 * record["Salary"]);
69 * }
70 *
71 * // To read the same string from the file "filename.csv":
72 *
73 * auto file = File("filename.csv", "r");
74 *
75 * foreach (record; csvReader!(string[string])
76 * (file.byLine.joiner("\n"), null))
77 * {
78 * writefln("%s works as a %s and earns $%s per year.",
79 * record["Name"], record["Occupation"],
80 * record["Salary"]);
81 * }
82 * -------
83 *
84 * This module allows content to be iterated by record stored in a struct,
85 * class, associative array, or as a range of fields. Upon detection of an
86 * error an CSVException is thrown (can be disabled). csvNextToken has been
87 * made public to allow for attempted recovery.
88 *
89 * Disabling exceptions will lift many restrictions specified above. A quote
90 * can appear in a field if the field was not quoted. If in a quoted field any
91 * quote by itself, not at the end of a field, will end processing for that
92 * field. The field is ended when there is no input, even if the quote was not
93 * closed.
94 *
95 * See_Also:
96 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
97 * Comma-separated values)
98 *
99 * Copyright: Copyright 2011
100 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
101 * Authors: Jesse Phillips
102 * Source: $(PHOBOSSRC std/csv.d)
103 */
104 module std.csv;
105
106 import std.conv;
107 import std.exception : basicExceptionCtors;
108 import std.range.primitives;
109 import std.traits;
110
111 /**
112 * Exception containing the row and column for when an exception was thrown.
113 *
114 * Numbering of both row and col start at one and corresponds to the location
115 * in the file rather than any specified header. Special consideration should
116 * be made when there is failure to match the header see $(LREF
117 * HeaderMismatchException) for details.
118 *
119 * When performing type conversions, $(REF ConvException, std,conv) is stored in
120 * the `next` field.
121 */
122 class CSVException : Exception
123 {
124 ///
125 size_t row, col;
126
127 // FIXME: Use std.exception.basicExceptionCtors here once
128 // https://issues.dlang.org/show_bug.cgi?id=11500 is fixed
129
130 this(string msg, string file = __FILE__, size_t line = __LINE__,
131 Throwable next = null) @nogc @safe pure nothrow
132 {
133 super(msg, file, line, next);
134 }
135
136 this(string msg, Throwable next, string file = __FILE__,
137 size_t line = __LINE__) @nogc @safe pure nothrow
138 {
139 super(msg, file, line, next);
140 }
141
142 this(string msg, size_t row, size_t col, Throwable next = null,
143 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow
144 {
145 super(msg, next, file, line);
146 this.row = row;
147 this.col = col;
148 }
149
150 override string toString() @safe pure const
151 {
152 return "(Row: " ~ to!string(row) ~
153 ", Col: " ~ to!string(col) ~ ") " ~ msg;
154 }
155 }
156
157 ///
158 @safe unittest
159 {
160 import std.exception : collectException;
161 import std.algorithm.searching : count;
162 string text = "a,b,c\nHello,65";
163 auto ex = collectException!CSVException(csvReader(text).count);
164 assert(ex.toString == "(Row: 0, Col: 0) Row 2's length 2 does not match previous length of 3.");
165 }
166
167 ///
168 @safe unittest
169 {
170 import std.exception : collectException;
171 import std.algorithm.searching : count;
172 import std.typecons : Tuple;
173 string text = "a,b\nHello,65";
174 auto ex = collectException!CSVException(csvReader!(Tuple!(string,int))(text).count);
175 assert(ex.toString == "(Row: 1, Col: 2) Unexpected 'b' when converting from type string to type int");
176 }
177
178 // https://issues.dlang.org/show_bug.cgi?id=24478
179 @safe unittest
180 {
181 import std.exception : collectException;
182 import std.algorithm.searching : count;
183 string text = "A, B\n1, 2, 3";
184 auto ex = collectException!CSVException(csvReader!(string[string])(text, null).count);
185 assert(ex.toString == "(Row: 1, Col: 3) row contains more values than header");
186 }
187
188 @safe pure unittest
189 {
190 import std.string;
191 auto e1 = new Exception("Foobar");
192 auto e2 = new CSVException("args", e1);
193 assert(e2.next is e1);
194
195 size_t r = 13;
196 size_t c = 37;
197
198 auto e3 = new CSVException("argv", r, c);
199 assert(e3.row == r);
200 assert(e3.col == c);
201
202 auto em = e3.toString();
203 assert(em.indexOf("13") != -1);
204 assert(em.indexOf("37") != -1);
205 }
206
207 /**
208 * Exception thrown when a Token is identified to not be completed: a quote is
209 * found in an unquoted field, data continues after a closing quote, or the
210 * quoted field was not closed before data was empty.
211 */
212 class IncompleteCellException : CSVException
213 {
214 /**
215 * Data pulled from input before finding a problem
216 *
217 * This field is populated when using $(LREF csvReader)
218 * but not by $(LREF csvNextToken) as this data will have
219 * already been fed to the output range.
220 */
221 dstring partialData;
222
223 mixin basicExceptionCtors;
224 }
225
226 ///
227 @safe unittest
228 {
229 import std.exception : assertThrown;
230 string text = "a,\"b,c\nHello,65,2.5";
231 assertThrown!IncompleteCellException(text.csvReader(["a","b","c"]));
232 }
233
234 @safe pure unittest
235 {
236 auto e1 = new Exception("Foobar");
237 auto e2 = new IncompleteCellException("args", e1);
238 assert(e2.next is e1);
239 }
240
241 /**
242 * Exception thrown under different conditions based on the type of $(D
243 * Contents).
244 *
245 * Structure, Class, and Associative Array
246 * $(UL
247 * $(LI When a header is provided but a matching column is not found)
248 * )
249 *
250 * Other
251 * $(UL
252 * $(LI When a header is provided but a matching column is not found)
253 * $(LI Order did not match that found in the input)
254 * )
255 *
256 * Since a row and column is not meaningful when a column specified by the
257 * header is not found in the data, both row and col will be zero. Otherwise
258 * row is always one and col is the first instance found in header that
259 * occurred before the previous starting at one.
260 */
261 class HeaderMismatchException : CSVException
262 {
263 mixin basicExceptionCtors;
264 }
265
266 ///
267 @safe unittest
268 {
269 import std.exception : assertThrown;
270 string text = "a,b,c\nHello,65,2.5";
271 assertThrown!HeaderMismatchException(text.csvReader(["b","c","invalid"]));
272 }
273
274 @safe pure unittest
275 {
276 auto e1 = new Exception("Foobar");
277 auto e2 = new HeaderMismatchException("args", e1);
278 assert(e2.next is e1);
279 }
280
281 /**
282 * Determines the behavior for when an error is detected.
283 *
284 * Disabling exception will follow these rules:
285 * $(UL
286 * $(LI A quote can appear in a field if the field was not quoted.)
287 * $(LI If in a quoted field any quote by itself, not at the end of a
288 * field, will end processing for that field.)
289 * $(LI The field is ended when there is no input, even if the quote was
290 * not closed.)
291 * $(LI If the given header does not match the order in the input, the
292 * content will return as it is found in the input.)
293 * $(LI If the given header contains columns not found in the input they
294 * will be ignored.)
295 * )
296 */
297 enum Malformed
298 {
299 ignore, /// No exceptions are thrown due to incorrect CSV.
300 throwException /// Use exceptions when input has incorrect CSV.
301 }
302
303 ///
304 @safe unittest
305 {
306 import std.algorithm.comparison : equal;
307 import std.algorithm.searching : count;
308 import std.exception : assertThrown;
309
310 string text = "a,b,c\nHello,65,\"2.5";
311 assertThrown!IncompleteCellException(text.csvReader.count);
312
313 // ignore the exceptions and try to handle invalid CSV
314 auto firstLine = text.csvReader!(string, Malformed.ignore)(null).front;
315 assert(firstLine.equal(["Hello", "65", "2.5"]));
316 }
317
318 /**
319 Returns an $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
320 for iterating over records found in `input`.
321
322 An optional `header` can be provided. The first record will be read in
323 as the header. If `Contents` is a struct then the header provided is
324 expected to correspond to the fields in the struct. When `Contents` is
325 not a type which can contain the entire record, the `header` must be
326 provided in the same order as the input or an exception is thrown.
327
328 Returns:
329 An input range R as defined by
330 $(REF isInputRange, std,range,primitives). When `Contents` is a
331 struct, class, or an associative array, the element type of R is
332 `Contents`, otherwise the element type of R is itself a range with
333 element type `Contents`.
334
335 If a `header` argument is provided,
336 the returned range provides a `header` field for accessing the header
337 from the input in array form.
338
339 Throws:
340 $(LREF CSVException) When a quote is found in an unquoted field,
341 data continues after a closing quote, the quoted field was not
342 closed before data was empty, a conversion failed, or when the row's
343 length does not match the previous length.
344
345 $(LREF HeaderMismatchException) when a header is provided but a
346 matching column is not found or the order did not match that found in
347 the input. Read the exception documentation for specific details of
348 when the exception is thrown for different types of `Contents`.
349 */
350 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
351 Separator delimiter = ',', Separator quote = '"',
352 bool allowInconsistentDelimiterCount = false)
353 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
354 && isSomeChar!(Separator)
355 && !is(Contents T : T[U], U : string))
356 {
357 return CsvReader!(Contents,ErrorLevel,Range,
358 Unqual!(ElementType!Range),string[])
359 (input, delimiter, quote, allowInconsistentDelimiterCount);
360 }
361
362 /// ditto
363 auto csvReader(Contents = string,
364 Malformed ErrorLevel = Malformed.throwException,
365 Range, Header, Separator = char)
366 (Range input, Header header,
367 Separator delimiter = ',', Separator quote = '"',
368 bool allowInconsistentDelimiterCount = false)
369 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
370 && isSomeChar!(Separator)
371 && isForwardRange!Header
372 && isSomeString!(ElementType!Header))
373 {
374 return CsvReader!(Contents,ErrorLevel,Range,
375 Unqual!(ElementType!Range),Header)
376 (input, header, delimiter, quote, allowInconsistentDelimiterCount);
377 }
378
379 /// ditto
380 auto csvReader(Contents = string,
381 Malformed ErrorLevel = Malformed.throwException,
382 Range, Header, Separator = char)
383 (Range input, Header header,
384 Separator delimiter = ',', Separator quote = '"',
385 bool allowInconsistentDelimiterCount = false)
386 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
387 && isSomeChar!(Separator)
388 && is(Header : typeof(null)))
389 {
390 return CsvReader!(Contents,ErrorLevel,Range,
391 Unqual!(ElementType!Range),string[])
392 (input, cast(string[]) null, delimiter, quote,
393 allowInconsistentDelimiterCount);
394 }
395
396
397 /**
398 The `Contents` of the input can be provided if all the records are the
399 same type such as all integer data:
400 */
401 @safe unittest
402 {
403 import std.algorithm.comparison : equal;
404 string text = "76,26,22";
405 auto records = text.csvReader!int;
406 assert(records.equal!equal([
407 [76, 26, 22],
408 ]));
409 }
410
411 /**
412 Using a struct with modified delimiter:
413 */
414 @safe unittest
415 {
416 import std.algorithm.comparison : equal;
417 string text = "Hello;65;2.5\nWorld;123;7.5";
418 struct Layout
419 {
420 string name;
421 int value;
422 double other;
423 }
424
425 auto records = text.csvReader!Layout(';');
426 assert(records.equal([
427 Layout("Hello", 65, 2.5),
428 Layout("World", 123, 7.5),
429 ]));
430 }
431
432 /**
433 Specifying `ErrorLevel` as $(LREF Malformed.ignore) will lift restrictions
434 on the format. This example shows that an exception is not thrown when
435 finding a quote in a field not quoted.
436 */
437 @safe unittest
438 {
439 string text = "A \" is now part of the data";
440 auto records = text.csvReader!(string, Malformed.ignore);
441 auto record = records.front;
442
443 assert(record.front == text);
444 }
445
446 /// Read only column "b"
447 @safe unittest
448 {
449 import std.algorithm.comparison : equal;
450 string text = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
451 auto records = text.csvReader!int(["b"]);
452
453 assert(records.equal!equal([
454 [65],
455 [123],
456 ]));
457 }
458
459 /// Read while rearranging the columns by specifying a header with a different order"
460 @safe unittest
461 {
462 import std.algorithm.comparison : equal;
463 string text = "a,b,c\nHello,65,2.5\nWorld,123,7.5";
464 struct Layout
465 {
466 int value;
467 double other;
468 string name;
469 }
470
471 auto records = text.csvReader!Layout(["b","c","a"]);
472 assert(records.equal([
473 Layout(65, 2.5, "Hello"),
474 Layout(123, 7.5, "World")
475 ]));
476 }
477
478 /**
479 The header can also be left empty if the input contains a header row
480 and all columns should be iterated.
481 The header from the input can always be accessed from the `header` field.
482 */
483 @safe unittest
484 {
485 string text = "a,b,c\nHello,65,63.63";
486 auto records = text.csvReader(null);
487
488 assert(records.header == ["a","b","c"]);
489 }
490
491 /**
492 Handcrafted csv files tend to have an variable amount of columns.
493
494 By default `std.csv` will throw if the number of columns on a line
495 is unequal to the number of columns of the first line.
496 To allow, or disallow, a variable amount of columns a `bool` can be passed to
497 all overloads of the `csvReader` function as shown below.
498 */
499 @safe unittest
500 {
501 import std.algorithm.comparison : equal;
502
503 string text = "76,26,22\n1,2\n3,4,5,6";
504 auto records = text.csvReader!int(',', '"', true);
505
506 assert(records.equal!equal([
507 [76, 26, 22],
508 [1, 2],
509 [3, 4, 5, 6]
510 ]));
511 }
512
513 /// ditto
514 @safe unittest
515 {
516 import std.algorithm.comparison : equal;
517
518 static struct Three
519 {
520 int a;
521 int b;
522 int c;
523 }
524
525 string text = "76,26,22\n1,2\n3,4,5,6";
526 auto records = text.csvReader!Three(',', '"', true);
527
528 assert(records.equal([
529 Three(76, 26, 22),
530 Three(1, 2, 0),
531 Three(3, 4, 5)
532 ]));
533 }
534
535 /// ditto
536 @safe unittest
537 {
538 import std.algorithm.comparison : equal;
539
540 auto text = "Name,Occupation,Salary\r" ~
541 "Joe,Carpenter,300000\nFred,Blacksmith\r\n";
542
543 auto r = csvReader!(string[string])(text, null, ',', '"', true);
544
545 assert(r.equal([
546 [ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ],
547 [ "Name" : "Fred", "Occupation" : "Blacksmith" ]
548 ]));
549 }
550
551 // Test standard iteration over input.
552 @safe pure unittest
553 {
554 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six";
555 auto records = csvReader(str);
556
557 int count;
558 foreach (record; records)
559 {
560 foreach (cell; record)
561 {
562 count++;
563 }
564 }
565 assert(count == 6);
566 }
567
568 // Test newline on last record
569 @safe pure unittest
570 {
571 string str = "one,two\nthree,four\n";
572 auto records = csvReader(str);
573 records.popFront();
574 records.popFront();
575 assert(records.empty);
576 }
577
578 // Test shorter row length
579 @safe pure unittest
580 {
581 wstring str = "one,1\ntwo\nthree"w;
582 struct Layout
583 {
584 string name;
585 int value;
586 }
587
588 Layout[3] ans;
589 ans[0].name = "one";
590 ans[0].value = 1;
591 ans[1].name = "two";
592 ans[1].value = 0;
593 ans[2].name = "three";
594 ans[2].value = 0;
595
596 auto records = csvReader!(Layout,Malformed.ignore)(str);
597
598 int count;
599 foreach (record; records)
600 {
601 assert(ans[count].name == record.name);
602 assert(ans[count].value == record.value);
603 count++;
604 }
605 }
606
607 // Test shorter row length exception
608 @safe pure unittest
609 {
610 import std.exception;
611
612 struct A
613 {
614 string a,b,c;
615 }
616
617 auto strs = ["one,1\ntwo",
618 "one\ntwo,2,二\nthree,3,三",
619 "one\ntwo,2\nthree,3",
620 "one,1\ntwo\nthree,3"];
621
622 foreach (str; strs)
623 {
624 auto records = csvReader!A(str);
625 assertThrown!CSVException((){foreach (record; records) { }}());
626 }
627 }
628
629
630 // Test structure conversion interface with unicode.
631 @safe pure unittest
632 {
633 import std.math.algebraic : abs;
634
635 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w;
636 struct Layout
637 {
638 string name;
639 int value;
640 double other;
641 }
642
643 Layout[2] ans;
644 ans[0].name = "\U00010143Hello";
645 ans[0].value = 65;
646 ans[0].other = 63.63;
647 ans[1].name = "World";
648 ans[1].value = 123;
649 ans[1].other = 3673.562;
650
651 auto records = csvReader!Layout(str);
652
653 int count;
654 foreach (record; records)
655 {
656 assert(ans[count].name == record.name);
657 assert(ans[count].value == record.value);
658 assert(abs(ans[count].other - record.other) < 0.00001);
659 count++;
660 }
661 assert(count == ans.length);
662 }
663
664 // Test input conversion interface
665 @safe pure unittest
666 {
667 import std.algorithm;
668 string str = `76,26,22`;
669 int[] ans = [76,26,22];
670 auto records = csvReader!int(str);
671
672 foreach (record; records)
673 {
674 assert(equal(record, ans));
675 }
676 }
677
678 // Test struct & header interface and same unicode
679 @safe unittest
680 {
681 import std.math.algebraic : abs;
682
683 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562";
684 struct Layout
685 {
686 int value;
687 double other;
688 string name;
689 }
690
691 auto records = csvReader!Layout(str, ["b","c","a"]);
692
693 Layout[2] ans;
694 ans[0].name = "Hello";
695 ans[0].value = 65;
696 ans[0].other = 63.63;
697 ans[1].name = "➊➋➂❹";
698 ans[1].value = 123;
699 ans[1].other = 3673.562;
700
701 int count;
702 foreach (record; records)
703 {
704 assert(ans[count].name == record.name);
705 assert(ans[count].value == record.value);
706 assert(abs(ans[count].other - record.other) < 0.00001);
707 count++;
708 }
709 assert(count == ans.length);
710
711 }
712
713 // Test header interface
714 @safe unittest
715 {
716 import std.algorithm;
717
718 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
719 auto records = csvReader!int(str, ["b"]);
720
721 auto ans = [[65],[123]];
722 foreach (record; records)
723 {
724 assert(equal(record, ans.front));
725 ans.popFront();
726 }
727
728 try
729 {
730 csvReader(str, ["c","b"]);
731 assert(0);
732 }
733 catch (HeaderMismatchException e)
734 {
735 assert(e.col == 2);
736 }
737 auto records2 = csvReader!(string,Malformed.ignore)
738 (str, ["b","a"], ',', '"');
739
740 auto ans2 = [["Hello","65"],["World","123"]];
741 foreach (record; records2)
742 {
743 assert(equal(record, ans2.front));
744 ans2.popFront();
745 }
746
747 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
748 records2 = csvReader!(string,Malformed.ignore)
749 (str, ["a","b","c","d"], ',', '"');
750
751 ans2 = [["Joe","Carpenter"],["Fred","Fly"]];
752 foreach (record; records2)
753 {
754 assert(equal(record, ans2.front));
755 ans2.popFront();
756 }
757 }
758
759 // Test null header interface
760 @safe unittest
761 {
762 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
763 auto records = csvReader(str, ["a"]);
764
765 assert(records.header == ["a","b","c"]);
766 }
767
768 // Test unchecked read
769 @safe pure unittest
770 {
771 string str = "one \"quoted\"";
772 foreach (record; csvReader!(string,Malformed.ignore)(str))
773 {
774 foreach (cell; record)
775 {
776 assert(cell == "one \"quoted\"");
777 }
778 }
779
780 str = "one \"quoted\",two \"quoted\" end";
781 struct Ans
782 {
783 string a,b;
784 }
785 foreach (record; csvReader!(Ans,Malformed.ignore)(str))
786 {
787 assert(record.a == "one \"quoted\"");
788 assert(record.b == "two \"quoted\" end");
789 }
790 }
791
792 // Test partial data returned
793 @safe pure unittest
794 {
795 string str = "\"one\nnew line";
796
797 try
798 {
799 foreach (record; csvReader(str))
800 {}
801 assert(0);
802 }
803 catch (IncompleteCellException ice)
804 {
805 assert(ice.partialData == "one\nnew line");
806 }
807 }
808
809 // Test Windows line break
810 @safe pure unittest
811 {
812 string str = "one,two\r\nthree";
813
814 auto records = csvReader(str);
815 auto record = records.front;
816 assert(record.front == "one");
817 record.popFront();
818 assert(record.front == "two");
819 records.popFront();
820 record = records.front;
821 assert(record.front == "three");
822 }
823
824
825 // Test associative array support with unicode separator
826 @safe unittest
827 {
828 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63";
829
830 auto records = csvReader!(string[string])(str,["3","1"],'❁');
831 int count;
832 foreach (record; records)
833 {
834 count++;
835 assert(record["1"] == "34");
836 assert(record["3"] == "63");
837 }
838 assert(count == 2);
839 }
840
841 // Test restricted range
842 @safe unittest
843 {
844 import std.typecons;
845 struct InputRange
846 {
847 dstring text;
848
849 this(dstring txt)
850 {
851 text = txt;
852 }
853
854 @property auto empty()
855 {
856 return text.empty;
857 }
858
859 void popFront()
860 {
861 text.popFront();
862 }
863
864 @property dchar front()
865 {
866 return text[0];
867 }
868 }
869 auto ir = InputRange("Name,Occupation,Salary\r"d~
870 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d);
871
872 foreach (record; csvReader(ir, cast(string[]) null))
873 foreach (cell; record) {}
874 foreach (record; csvReader!(Tuple!(string, string, int))
875 (ir,cast(string[]) null)) {}
876 foreach (record; csvReader!(string[string])
877 (ir,cast(string[]) null)) {}
878 }
879
880 @safe unittest // const/immutable dchars
881 {
882 import std.algorithm.iteration : map;
883 import std.array : array;
884 const(dchar)[] c = "foo,bar\n";
885 assert(csvReader(c).map!array.array == [["foo", "bar"]]);
886 immutable(dchar)[] i = "foo,bar\n";
887 assert(csvReader(i).map!array.array == [["foo", "bar"]]);
888 }
889
890 /*
891 * This struct is stored on the heap for when the structures
892 * are passed around.
893 */
894 private pure struct Input(Range, Malformed ErrorLevel)
895 {
896 Range range;
897 size_t row, col;
898 static if (ErrorLevel == Malformed.throwException)
899 size_t rowLength;
900 }
901
902 /*
903 * Range for iterating CSV records.
904 *
905 * This range is returned by the $(LREF csvReader) functions. It can be
906 * created in a similar manner to allow `ErrorLevel` be set to $(LREF
907 * Malformed).ignore if best guess processing should take place.
908 */
909 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header)
910 if (isSomeChar!Separator && isInputRange!Range
911 && is(immutable ElementType!Range == immutable dchar)
912 && isForwardRange!Header && isSomeString!(ElementType!Header))
913 {
914 private:
915 Input!(Range, ErrorLevel)* _input;
916 Separator _separator;
917 Separator _quote;
918 size_t[] indices;
919 bool _empty;
920 bool _allowInconsistentDelimiterCount;
921 static if (is(Contents == struct) || is(Contents == class))
922 {
923 Contents recordContent;
924 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange;
925 }
926 else static if (is(Contents T : T[U], U : string))
927 {
928 Contents recordContent;
929 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange;
930 }
931 else
932 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange;
933 public:
934 /**
935 * Header from the input in array form.
936 *
937 * -------
938 * string str = "a,b,c\nHello,65,63.63";
939 * auto records = csvReader(str, ["a"]);
940 *
941 * assert(records.header == ["a","b","c"]);
942 * -------
943 */
944 string[] header;
945
946 /**
947 * Constructor to initialize the input, delimiter and quote for input
948 * without a header.
949 *
950 * -------
951 * string str = `76;^26^;22`;
952 * int[] ans = [76,26,22];
953 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
954 * (str, ';', '^');
955 *
956 * foreach (record; records)
957 * {
958 * assert(equal(record, ans));
959 * }
960 * -------
961 */
962 this(Range input, Separator delimiter, Separator quote,
963 bool allowInconsistentDelimiterCount)
964 {
965 _input = new Input!(Range, ErrorLevel)(input);
966 _separator = delimiter;
967 _quote = quote;
968 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
969
970 if (_input.range.empty)
971 {
972 _empty = true;
973 return;
974 }
975
976 prime();
977 }
978
979 /**
980 * Constructor to initialize the input, delimiter and quote for input
981 * with a header.
982 *
983 * -------
984 * string str = `high;mean;low\n76;^26^;22`;
985 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
986 * (str, ["high","low"], ';', '^');
987 *
988 * int[] ans = [76,22];
989 * foreach (record; records)
990 * {
991 * assert(equal(record, ans));
992 * }
993 * -------
994 *
995 * Throws:
996 * $(LREF HeaderMismatchException) when a header is provided but a
997 * matching column is not found or the order did not match that found
998 * in the input (non-struct).
999 */
1000 this(Range input, Header colHeaders, Separator delimiter, Separator quote,
1001 bool allowInconsistentDelimiterCount)
1002 {
1003 _input = new Input!(Range, ErrorLevel)(input);
1004 _separator = delimiter;
1005 _quote = quote;
1006 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
1007
1008 if (_input.range.empty)
1009 {
1010 _empty = true;
1011 return;
1012 }
1013
1014 size_t[string] colToIndex;
1015 foreach (h; colHeaders)
1016 {
1017 colToIndex[h] = size_t.max;
1018 }
1019
1020 auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
1021 (_input, _separator, _quote, indices,
1022 _allowInconsistentDelimiterCount);
1023
1024 size_t colIndex;
1025 foreach (col; r)
1026 {
1027 header ~= col;
1028 auto ptr = col in colToIndex;
1029 if (ptr)
1030 *ptr = colIndex;
1031 colIndex++;
1032 }
1033 // The above loop empties the header row.
1034 recordRange._empty = true;
1035 recordRange._allowInconsistentDelimiterCount =
1036 allowInconsistentDelimiterCount;
1037
1038 indices.length = colToIndex.length;
1039 int i;
1040 foreach (h; colHeaders)
1041 {
1042 immutable index = colToIndex[h];
1043 static if (ErrorLevel != Malformed.ignore)
1044 if (index == size_t.max)
1045 throw new HeaderMismatchException
1046 ("Header not found: " ~ to!string(h));
1047 indices[i++] = index;
1048 }
1049
1050 static if (!is(Contents == struct) && !is(Contents == class))
1051 {
1052 static if (is(Contents T : T[U], U : string))
1053 {
1054 import std.algorithm.sorting : sort;
1055 sort(indices);
1056 }
1057 else static if (ErrorLevel == Malformed.ignore)
1058 {
1059 import std.algorithm.sorting : sort;
1060 sort(indices);
1061 }
1062 else
1063 {
1064 import std.algorithm.searching : findAdjacent;
1065 import std.algorithm.sorting : isSorted;
1066 if (!isSorted(indices))
1067 {
1068 auto ex = new HeaderMismatchException
1069 ("Header in input does not match specified header.");
1070 findAdjacent!"a > b"(indices);
1071 ex.row = 1;
1072 ex.col = indices.front;
1073
1074 throw ex;
1075 }
1076 }
1077 }
1078
1079 popFront();
1080 }
1081
1082 /**
1083 * Part of an input range as defined by
1084 * $(REF isInputRange, std,range,primitives).
1085 *
1086 * Returns:
1087 * If `Contents` is a struct, will be filled with record data.
1088 *
1089 * If `Contents` is a class, will be filled with record data.
1090 *
1091 * If `Contents` is a associative array, will be filled
1092 * with record data.
1093 *
1094 * If `Contents` is non-struct, a $(LREF CsvRecord) will be
1095 * returned.
1096 */
1097 @property auto front()
1098 {
1099 assert(!empty, "Attempting to fetch the front of an empty CsvReader");
1100 static if (is(Contents == struct) || is(Contents == class))
1101 {
1102 return recordContent;
1103 }
1104 else static if (is(Contents T : T[U], U : string))
1105 {
1106 return recordContent;
1107 }
1108 else
1109 {
1110 return recordRange;
1111 }
1112 }
1113
1114 /**
1115 * Part of an input range as defined by
1116 * $(REF isInputRange, std,range,primitives).
1117 */
1118 @property bool empty() @safe @nogc pure nothrow const
1119 {
1120 return _empty;
1121 }
1122
1123 /**
1124 * Part of an input range as defined by
1125 * $(REF isInputRange, std,range,primitives).
1126 *
1127 * Throws:
1128 * $(LREF CSVException) When a quote is found in an unquoted field,
1129 * data continues after a closing quote, the quoted field was not
1130 * closed before data was empty, a conversion failed, or when the
1131 * row's length does not match the previous length.
1132 */
1133 void popFront()
1134 {
1135 while (!recordRange.empty)
1136 {
1137 recordRange.popFront();
1138 }
1139
1140 static if (ErrorLevel == Malformed.throwException)
1141 if (_input.rowLength == 0)
1142 _input.rowLength = _input.col;
1143
1144 _input.col = 0;
1145
1146 if (!_input.range.empty)
1147 {
1148 if (_input.range.front == '\r')
1149 {
1150 _input.range.popFront();
1151 if (!_input.range.empty && _input.range.front == '\n')
1152 _input.range.popFront();
1153 }
1154 else if (_input.range.front == '\n')
1155 _input.range.popFront();
1156 }
1157
1158 if (_input.range.empty)
1159 {
1160 _empty = true;
1161 return;
1162 }
1163
1164 prime();
1165 }
1166
1167 private void prime()
1168 {
1169 if (_empty)
1170 return;
1171 _input.row++;
1172 static if (is(Contents == struct) || is(Contents == class))
1173 {
1174 recordRange = typeof(recordRange)
1175 (_input, _separator, _quote, null,
1176 _allowInconsistentDelimiterCount);
1177 }
1178 else
1179 {
1180 recordRange = typeof(recordRange)
1181 (_input, _separator, _quote, indices,
1182 _allowInconsistentDelimiterCount);
1183 }
1184
1185 static if (is(Contents T : T[U], U : string))
1186 {
1187 T[U] aa;
1188 try
1189 {
1190 for (; !recordRange.empty; recordRange.popFront())
1191 {
1192 const i = _input.col - 1;
1193 if (i >= header.length)
1194 throw new CSVException("row contains more values than header", _input.row, _input.col);
1195 aa[header[i]] = recordRange.front;
1196 }
1197 }
1198 catch (ConvException e)
1199 {
1200 throw new CSVException(e.msg, _input.row, _input.col, e);
1201 }
1202
1203 recordContent = aa;
1204 }
1205 else static if (is(Contents == struct) || is(Contents == class))
1206 {
1207 static if (is(Contents == class))
1208 recordContent = new typeof(recordContent)();
1209 else
1210 recordContent = typeof(recordContent).init;
1211 size_t colIndex;
1212 try
1213 {
1214 for (; !recordRange.empty;)
1215 {
1216 auto colData = recordRange.front;
1217 scope(exit) colIndex++;
1218 if (indices.length > 0)
1219 {
1220 foreach (ti, ToType; Fields!(Contents))
1221 {
1222 if (indices[ti] == colIndex)
1223 {
1224 static if (!isSomeString!ToType) skipWS(colData);
1225 recordContent.tupleof[ti] = to!ToType(colData);
1226 }
1227 }
1228 }
1229 else
1230 {
1231 foreach (ti, ToType; Fields!(Contents))
1232 {
1233 if (ti == colIndex)
1234 {
1235 static if (!isSomeString!ToType) skipWS(colData);
1236 recordContent.tupleof[ti] = to!ToType(colData);
1237 }
1238 }
1239 }
1240 recordRange.popFront();
1241 }
1242 }
1243 catch (ConvException e)
1244 {
1245 throw new CSVException(e.msg, _input.row, colIndex, e);
1246 }
1247 }
1248 }
1249 }
1250
1251 @safe pure unittest
1252 {
1253 import std.algorithm.comparison : equal;
1254
1255 string str = `76;^26^;22`;
1256 int[] ans = [76,26,22];
1257 auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
1258 (str, ';', '^', false);
1259
1260 foreach (record; records)
1261 {
1262 assert(equal(record, ans));
1263 }
1264 }
1265
1266 // https://issues.dlang.org/show_bug.cgi?id=15545
1267 // @system due to the catch for Throwable
1268 @system pure unittest
1269 {
1270 import std.exception : assertNotThrown;
1271 enum failData =
1272 "name, surname, age
1273 Joe, Joker, 99\r";
1274 auto r = csvReader(failData);
1275 assertNotThrown((){foreach (entry; r){}}());
1276 }
1277
1278 /*
1279 * This input range is accessible through $(LREF CsvReader) when the
1280 * requested `Contents` type is neither a structure or an associative array.
1281 */
1282 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator)
1283 if (!is(Contents == class) && !is(Contents == struct))
1284 {
1285 import std.array : appender;
1286 private:
1287 Input!(Range, ErrorLevel)* _input;
1288 Separator _separator;
1289 Separator _quote;
1290 Contents curContentsoken;
1291 typeof(appender!(dchar[])()) _front;
1292 bool _empty;
1293 bool _allowInconsistentDelimiterCount;
1294 size_t[] _popCount;
1295 public:
1296 /*
1297 * Params:
1298 * input = Pointer to a character $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
1299 * delimiter = Separator for each column
1300 * quote = Character used for quotation
1301 * indices = An array containing which columns will be returned.
1302 * If empty, all columns are returned. List must be in order.
1303 */
1304 this(Input!(Range, ErrorLevel)* input, Separator delimiter,
1305 Separator quote, size_t[] indices,
1306 bool allowInconsistentDelimiterCount)
1307 {
1308 _input = input;
1309 _separator = delimiter;
1310 _quote = quote;
1311
1312 _front = appender!(dchar[])();
1313 _popCount = indices.dup;
1314 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
1315
1316 // If a header was given, each call to popFront will need
1317 // to eliminate so many tokens. This calculates
1318 // how many will be skipped to get to the next header column
1319 size_t normalizer;
1320 foreach (ref c; _popCount)
1321 {
1322 static if (ErrorLevel == Malformed.ignore)
1323 {
1324 // If we are not throwing exceptions
1325 // a header may not exist, indices are sorted
1326 // and will be size_t.max if not found.
1327 if (c == size_t.max)
1328 break;
1329 }
1330 c -= normalizer;
1331 normalizer += c + 1;
1332 }
1333
1334 prime();
1335 }
1336
1337 /**
1338 * Part of an input range as defined by
1339 * $(REF isInputRange, std,range,primitives).
1340 */
1341 @property Contents front() @safe pure
1342 {
1343 assert(!empty, "Attempting to fetch the front of an empty CsvRecord");
1344 return curContentsoken;
1345 }
1346
1347 /**
1348 * Part of an input range as defined by
1349 * $(REF isInputRange, std,range,primitives).
1350 */
1351 @property bool empty() @safe pure nothrow @nogc const
1352 {
1353 return _empty;
1354 }
1355
1356 /*
1357 * CsvRecord is complete when input
1358 * is empty or starts with record break
1359 */
1360 private bool recordEnd()
1361 {
1362 if (_input.range.empty
1363 || _input.range.front == '\n'
1364 || _input.range.front == '\r')
1365 {
1366 return true;
1367 }
1368 return false;
1369 }
1370
1371
1372 /**
1373 * Part of an input range as defined by
1374 * $(REF isInputRange, std,range,primitives).
1375 *
1376 * Throws:
1377 * $(LREF CSVException) When a quote is found in an unquoted field,
1378 * data continues after a closing quote, the quoted field was not
1379 * closed before data was empty, a conversion failed, or when the
1380 * row's length does not match the previous length.
1381 */
1382 void popFront()
1383 {
1384 static if (ErrorLevel == Malformed.throwException)
1385 import std.format : format;
1386 // Skip last of record when header is depleted.
1387 if (_popCount.ptr && _popCount.empty)
1388 while (!recordEnd())
1389 {
1390 prime(1);
1391 }
1392
1393 if (recordEnd())
1394 {
1395 _empty = true;
1396 static if (ErrorLevel == Malformed.throwException)
1397 {
1398 if (_input.rowLength != 0 && _input.col != _input.rowLength
1399 && !_allowInconsistentDelimiterCount)
1400 {
1401 throw new CSVException(
1402 format("Row %s's length %s does not match "~
1403 "previous length of %s.", _input.row,
1404 _input.col, _input.rowLength));
1405 }
1406 }
1407 return;
1408 }
1409 else
1410 {
1411 static if (ErrorLevel == Malformed.throwException)
1412 {
1413 if (_input.rowLength != 0 && _input.col > _input.rowLength)
1414 {
1415 if (!_allowInconsistentDelimiterCount)
1416 {
1417 throw new CSVException(
1418 format("Row %s's length %s does not match "~
1419 "previous length of %s.", _input.row,
1420 _input.col, _input.rowLength));
1421 }
1422 else
1423 {
1424 _empty = true;
1425 return;
1426 }
1427 }
1428 }
1429 }
1430
1431 // Separator is left on the end of input from the last call.
1432 // This cannot be moved to after the call to csvNextToken as
1433 // there may be an empty record after it.
1434 if (_input.range.front == _separator)
1435 _input.range.popFront();
1436
1437 _front.shrinkTo(0);
1438
1439 prime();
1440 }
1441
1442 /*
1443 * Handles moving to the next skipNum token.
1444 */
1445 private void prime(size_t skipNum)
1446 {
1447 foreach (i; 0 .. skipNum)
1448 {
1449 _input.col++;
1450 _front.shrinkTo(0);
1451 if (_input.range.front == _separator)
1452 _input.range.popFront();
1453
1454 try
1455 csvNextToken!(Range, ErrorLevel, Separator)
1456 (_input.range, _front, _separator, _quote,false);
1457 catch (IncompleteCellException ice)
1458 {
1459 ice.row = _input.row;
1460 ice.col = _input.col;
1461 ice.partialData = _front.data.idup;
1462 throw ice;
1463 }
1464 catch (ConvException e)
1465 {
1466 throw new CSVException(e.msg, _input.row, _input.col, e);
1467 }
1468 }
1469 }
1470
1471 private void prime()
1472 {
1473 try
1474 {
1475 _input.col++;
1476 csvNextToken!(Range, ErrorLevel, Separator)
1477 (_input.range, _front, _separator, _quote,false);
1478 }
1479 catch (IncompleteCellException ice)
1480 {
1481 ice.row = _input.row;
1482 ice.col = _input.col;
1483 ice.partialData = _front.data.idup;
1484 throw ice;
1485 }
1486
1487 auto skipNum = _popCount.empty ? 0 : _popCount.front;
1488 if (!_popCount.empty)
1489 _popCount.popFront();
1490
1491 if (skipNum == size_t.max)
1492 {
1493 while (!recordEnd())
1494 prime(1);
1495 _empty = true;
1496 return;
1497 }
1498
1499 if (skipNum)
1500 prime(skipNum);
1501
1502 auto data = _front.data;
1503 static if (!isSomeString!Contents) skipWS(data);
1504 try curContentsoken = to!Contents(data);
1505 catch (ConvException e)
1506 {
1507 throw new CSVException(e.msg, _input.row, _input.col, e);
1508 }
1509 }
1510 }
1511
1512 /**
1513 * Lower level control over parsing CSV
1514 *
1515 * This function consumes the input. After each call the input will
1516 * start with either a delimiter or record break (\n, \r\n, \r) which
1517 * must be removed for subsequent calls.
1518 *
1519 * Params:
1520 * input = Any CSV input
1521 * ans = The first field in the input
1522 * sep = The character to represent a comma in the specification
1523 * quote = The character to represent a quote in the specification
1524 * startQuoted = Whether the input should be considered to already be in
1525 * quotes
1526 *
1527 * Throws:
1528 * $(LREF IncompleteCellException) When a quote is found in an unquoted
1529 * field, data continues after a closing quote, or the quoted field was
1530 * not closed before data was empty.
1531 */
1532 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException,
1533 Separator, Output)
1534 (ref Range input, ref Output ans,
1535 Separator sep, Separator quote,
1536 bool startQuoted = false)
1537 if (isSomeChar!Separator && isInputRange!Range
1538 && is(immutable ElementType!Range == immutable dchar)
1539 && isOutputRange!(Output, dchar))
1540 {
1541 bool quoted = startQuoted;
1542 bool escQuote;
1543 if (input.empty)
1544 return;
1545
1546 if (input.front == '\n')
1547 return;
1548 if (input.front == '\r')
1549 return;
1550
1551 if (input.front == quote)
1552 {
1553 quoted = true;
1554 input.popFront();
1555 }
1556
1557 while (!input.empty)
1558 {
1559 assert(!(quoted && escQuote),
1560 "Invalid quotation state in csvNextToken");
1561 if (!quoted)
1562 {
1563 // When not quoted the token ends at sep
1564 if (input.front == sep)
1565 break;
1566 if (input.front == '\r')
1567 break;
1568 if (input.front == '\n')
1569 break;
1570 }
1571 if (!quoted && !escQuote)
1572 {
1573 if (input.front == quote)
1574 {
1575 // Not quoted, but quote found
1576 static if (ErrorLevel == Malformed.throwException)
1577 throw new IncompleteCellException(
1578 "Quote located in unquoted token");
1579 else static if (ErrorLevel == Malformed.ignore)
1580 ans.put(quote);
1581 }
1582 else
1583 {
1584 // Not quoted, non-quote character
1585 ans.put(input.front);
1586 }
1587 }
1588 else
1589 {
1590 if (input.front == quote)
1591 {
1592 // Quoted, quote found
1593 // By turning off quoted and turning on escQuote
1594 // I can tell when to add a quote to the string
1595 // escQuote is turned to false when it escapes a
1596 // quote or is followed by a non-quote (see outside else).
1597 // They are mutually exclusive, but provide different
1598 // information.
1599 if (escQuote)
1600 {
1601 escQuote = false;
1602 quoted = true;
1603 ans.put(quote);
1604 } else
1605 {
1606 escQuote = true;
1607 quoted = false;
1608 }
1609 }
1610 else
1611 {
1612 // Quoted, non-quote character
1613 if (escQuote)
1614 {
1615 static if (ErrorLevel == Malformed.throwException)
1616 throw new IncompleteCellException(
1617 "Content continues after end quote, " ~
1618 "or needs to be escaped.");
1619 else static if (ErrorLevel == Malformed.ignore)
1620 break;
1621 }
1622 ans.put(input.front);
1623 }
1624 }
1625 input.popFront();
1626 }
1627
1628 static if (ErrorLevel == Malformed.throwException)
1629 if (quoted && (input.empty || input.front == '\n' || input.front == '\r'))
1630 throw new IncompleteCellException(
1631 "Data continues on future lines or trailing quote");
1632
1633 }
1634
1635 ///
1636 @safe unittest
1637 {
1638 import std.array : appender;
1639 import std.range.primitives : popFront;
1640
1641 string str = "65,63\n123,3673";
1642
1643 auto a = appender!(char[])();
1644
1645 csvNextToken(str,a,',','"');
1646 assert(a.data == "65");
1647 assert(str == ",63\n123,3673");
1648
1649 str.popFront();
1650 a.shrinkTo(0);
1651 csvNextToken(str,a,',','"');
1652 assert(a.data == "63");
1653 assert(str == "\n123,3673");
1654
1655 str.popFront();
1656 a.shrinkTo(0);
1657 csvNextToken(str,a,',','"');
1658 assert(a.data == "123");
1659 assert(str == ",3673");
1660 }
1661
1662 // Test csvNextToken on simplest form and correct format.
1663 @safe pure unittest
1664 {
1665 import std.array;
1666
1667 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562";
1668
1669 auto a = appender!(dchar[])();
1670 csvNextToken!string(str,a,',','"');
1671 assert(a.data == "\U00010143Hello");
1672 assert(str == ",65,63.63\nWorld,123,3673.562");
1673
1674 str.popFront();
1675 a.shrinkTo(0);
1676 csvNextToken(str,a,',','"');
1677 assert(a.data == "65");
1678 assert(str == ",63.63\nWorld,123,3673.562");
1679
1680 str.popFront();
1681 a.shrinkTo(0);
1682 csvNextToken(str,a,',','"');
1683 assert(a.data == "63.63");
1684 assert(str == "\nWorld,123,3673.562");
1685
1686 str.popFront();
1687 a.shrinkTo(0);
1688 csvNextToken(str,a,',','"');
1689 assert(a.data == "World");
1690 assert(str == ",123,3673.562");
1691
1692 str.popFront();
1693 a.shrinkTo(0);
1694 csvNextToken(str,a,',','"');
1695 assert(a.data == "123");
1696 assert(str == ",3673.562");
1697
1698 str.popFront();
1699 a.shrinkTo(0);
1700 csvNextToken(str,a,',','"');
1701 assert(a.data == "3673.562");
1702 assert(str == "");
1703 }
1704
1705 // Test quoted tokens
1706 @safe pure unittest
1707 {
1708 import std.array;
1709
1710 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
1711
1712 auto a = appender!(dchar[])();
1713 csvNextToken!string(str,a,',','"');
1714 assert(a.data == "one");
1715 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1716
1717 str.popFront();
1718 a.shrinkTo(0);
1719 csvNextToken(str,a,',','"');
1720 assert(a.data == "two");
1721 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1722
1723 str.popFront();
1724 a.shrinkTo(0);
1725 csvNextToken(str,a,',','"');
1726 assert(a.data == "three \"quoted\"");
1727 assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
1728
1729 str.popFront();
1730 a.shrinkTo(0);
1731 csvNextToken(str,a,',','"');
1732 assert(a.data == "");
1733 assert(str == ",\"five\nnew line\"\nsix");
1734
1735 str.popFront();
1736 a.shrinkTo(0);
1737 csvNextToken(str,a,',','"');
1738 assert(a.data == "five\nnew line");
1739 assert(str == "\nsix");
1740
1741 str.popFront();
1742 a.shrinkTo(0);
1743 csvNextToken(str,a,',','"');
1744 assert(a.data == "six");
1745 assert(str == "");
1746 }
1747
1748 // Test empty data is pulled at end of record.
1749 @safe pure unittest
1750 {
1751 import std.array;
1752
1753 string str = "one,";
1754 auto a = appender!(dchar[])();
1755 csvNextToken(str,a,',','"');
1756 assert(a.data == "one");
1757 assert(str == ",");
1758
1759 a.shrinkTo(0);
1760 csvNextToken(str,a,',','"');
1761 assert(a.data == "");
1762 }
1763
1764 // Test exceptions
1765 @safe pure unittest
1766 {
1767 import std.array;
1768
1769 string str = "\"one\nnew line";
1770
1771 typeof(appender!(dchar[])()) a;
1772 try
1773 {
1774 a = appender!(dchar[])();
1775 csvNextToken(str,a,',','"');
1776 assert(0);
1777 }
1778 catch (IncompleteCellException ice)
1779 {
1780 assert(a.data == "one\nnew line");
1781 assert(str == "");
1782 }
1783
1784 str = "Hello world\"";
1785
1786 try
1787 {
1788 a = appender!(dchar[])();
1789 csvNextToken(str,a,',','"');
1790 assert(0);
1791 }
1792 catch (IncompleteCellException ice)
1793 {
1794 assert(a.data == "Hello world");
1795 assert(str == "\"");
1796 }
1797
1798 str = "one, two \"quoted\" end";
1799
1800 a = appender!(dchar[])();
1801 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1802 assert(a.data == "one");
1803 str.popFront();
1804 a.shrinkTo(0);
1805 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1806 assert(a.data == " two \"quoted\" end");
1807 }
1808
1809 // Test modifying token delimiter
1810 @safe pure unittest
1811 {
1812 import std.array;
1813
1814 string str = `one|two|/three "quoted"/|//`;
1815
1816 auto a = appender!(dchar[])();
1817 csvNextToken(str,a, '|','/');
1818 assert(a.data == "one"d);
1819 assert(str == `|two|/three "quoted"/|//`);
1820
1821 str.popFront();
1822 a.shrinkTo(0);
1823 csvNextToken(str,a, '|','/');
1824 assert(a.data == "two"d);
1825 assert(str == `|/three "quoted"/|//`);
1826
1827 str.popFront();
1828 a.shrinkTo(0);
1829 csvNextToken(str,a, '|','/');
1830 assert(a.data == `three "quoted"`);
1831 assert(str == `|//`);
1832
1833 str.popFront();
1834 a.shrinkTo(0);
1835 csvNextToken(str,a, '|','/');
1836 assert(a.data == ""d);
1837 }
1838
1839 // https://issues.dlang.org/show_bug.cgi?id=8908
1840 @safe pure unittest
1841 {
1842 string csv = ` 1.0, 2.0, 3.0
1843 4.0, 5.0, 6.0`;
1844
1845 static struct Data { real a, b, c; }
1846 size_t i = 0;
1847 foreach (data; csvReader!Data(csv)) with (data)
1848 {
1849 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1850 if (i == 0)
1851 assert(row == [1, 2, 3]);
1852 else
1853 assert(row == [4, 5, 6]);
1854 ++i;
1855 }
1856
1857 i = 0;
1858 foreach (data; csvReader!real(csv))
1859 {
1860 auto a = data.front; data.popFront();
1861 auto b = data.front; data.popFront();
1862 auto c = data.front;
1863 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1864 if (i == 0)
1865 assert(row == [1, 2, 3]);
1866 else
1867 assert(row == [4, 5, 6]);
1868 ++i;
1869 }
1870 }
1871
1872 // https://issues.dlang.org/show_bug.cgi?id=21629
1873 @safe pure unittest
1874 {
1875 import std.typecons : Tuple;
1876 struct Reccord
1877 {
1878 string a;
1879 string b;
1880 }
1881
1882 auto header = ["a" ,"b"];
1883 string input = "";
1884 assert(csvReader!Reccord(input).empty, "This should be empty");
1885 assert(csvReader!Reccord(input, header).empty, "This should be empty");
1886 assert(csvReader!(Tuple!(string,string))(input).empty, "This should be empty");
1887 assert(csvReader!(string[string])(input, header).empty, "This should be empty");
1888 assert(csvReader!(string[string])(input, null).empty, "This should be empty");
1889 assert(csvReader!(int)(input, null).empty, "This should be empty");
1890 }