1 // Written in the D programming language. 2 3 /** 4 String handling functions. 5 6 $(SCRIPT inhibitQuickIndex = 1;) 7 8 $(DIVC quickindex, 9 $(BOOKTABLE , 10 $(TR $(TH Category) $(TH Functions) ) 11 $(TR $(TDNW Searching) 12 $(TD 13 $(MYREF column) 14 $(MYREF indexOf) 15 $(MYREF indexOfAny) 16 $(MYREF indexOfNeither) 17 $(MYREF lastIndexOf) 18 $(MYREF lastIndexOfAny) 19 $(MYREF lastIndexOfNeither) 20 ) 21 ) 22 $(TR $(TDNW Comparison) 23 $(TD 24 $(MYREF isNumeric) 25 ) 26 ) 27 $(TR $(TDNW Mutation) 28 $(TD 29 $(MYREF capitalize) 30 ) 31 ) 32 $(TR $(TDNW Pruning and Filling) 33 $(TD 34 $(MYREF center) 35 $(MYREF chomp) 36 $(MYREF chompPrefix) 37 $(MYREF chop) 38 $(MYREF detabber) 39 $(MYREF detab) 40 $(MYREF entab) 41 $(MYREF entabber) 42 $(MYREF leftJustify) 43 $(MYREF outdent) 44 $(MYREF rightJustify) 45 $(MYREF strip) 46 $(MYREF stripLeft) 47 $(MYREF stripRight) 48 $(MYREF wrap) 49 ) 50 ) 51 $(TR $(TDNW Substitution) 52 $(TD 53 $(MYREF abbrev) 54 $(MYREF soundex) 55 $(MYREF soundexer) 56 $(MYREF succ) 57 $(MYREF tr) 58 $(MYREF translate) 59 ) 60 ) 61 $(TR $(TDNW Miscellaneous) 62 $(TD 63 $(MYREF assumeUTF) 64 $(MYREF fromStringz) 65 $(MYREF lineSplitter) 66 $(MYREF representation) 67 $(MYREF splitLines) 68 $(MYREF toStringz) 69 ) 70 ))) 71 72 Objects of types `string`, `wstring`, and `dstring` are value types 73 and cannot be mutated element-by-element. For using mutation during building 74 strings, use `char[]`, `wchar[]`, or `dchar[]`. The `xxxstring` 75 types are preferable because they don't exhibit undesired aliasing, thus 76 making code more robust. 77 78 The following functions are publicly imported: 79 80 $(BOOKTABLE , 81 $(TR $(TH Module) $(TH Functions) ) 82 $(LEADINGROW Publicly imported functions) 83 $(TR $(TD std.algorithm) 84 $(TD 85 $(REF_SHORT cmp, std,algorithm,comparison) 86 $(REF_SHORT count, std,algorithm,searching) 87 $(REF_SHORT endsWith, std,algorithm,searching) 88 $(REF_SHORT startsWith, std,algorithm,searching) 89 )) 90 $(TR $(TD std.array) 91 $(TD 92 $(REF_SHORT join, std,array) 93 $(REF_SHORT replace, std,array) 94 $(REF_SHORT replaceInPlace, std,array) 95 $(REF_SHORT split, std,array) 96 $(REF_SHORT empty, std,array) 97 )) 98 $(TR $(TD std.format) 99 $(TD 100 $(REF_SHORT format, std,format) 101 $(REF_SHORT sformat, std,format) 102 )) 103 $(TR $(TD std.uni) 104 $(TD 105 $(REF_SHORT icmp, std,uni) 106 $(REF_SHORT toLower, std,uni) 107 $(REF_SHORT toLowerInPlace, std,uni) 108 $(REF_SHORT toUpper, std,uni) 109 $(REF_SHORT toUpperInPlace, std,uni) 110 )) 111 ) 112 113 There is a rich set of functions for string handling defined in other modules. 114 Functions related to Unicode and ASCII are found in $(MREF std, uni) 115 and $(MREF std, ascii), respectively. Other functions that have a 116 wider generality than just strings can be found in $(MREF std, algorithm) 117 and $(MREF std, range). 118 119 See_Also: 120 $(LIST 121 $(MREF std, algorithm) and 122 $(MREF std, range) 123 for generic range algorithms 124 , 125 $(MREF std, ascii) 126 for functions that work with ASCII strings 127 , 128 $(MREF std, uni) 129 for functions that work with unicode strings 130 ) 131 132 Copyright: Copyright The D Language Foundation 2007-. 133 134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0). 135 136 Authors: $(HTTP digitalmars.com, Walter Bright), 137 $(HTTP erdani.org, Andrei Alexandrescu), 138 $(HTTP jmdavisprog.com, Jonathan M Davis), 139 and David L. 'SpottedTiger' Davis 140 141 Source: $(PHOBOSSRC std/string.d) 142 143 */ 144 module std..string; 145 146 version (StdUnittest) 147 { 148 private: 149 struct TestAliasedString 150 { 151 string get() @safe @nogc pure nothrow return scope { return _s; } 152 alias get this; 153 @disable this(this); 154 string _s; 155 } 156 157 bool testAliasedString(alias func, Args...)(string s, Args args) 158 { 159 import std.algorithm.comparison : equal; 160 auto a = func(TestAliasedString(s), args); 161 auto b = func(s, args); 162 static if (is(typeof(equal(a, b)))) 163 { 164 // For ranges, compare contents instead of object identity. 165 return equal(a, b); 166 } 167 else 168 { 169 return a == b; 170 } 171 } 172 } 173 174 public import std.format : format, sformat; 175 import std.typecons : Flag, Yes, No; 176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace; 177 178 import std.meta : AliasSeq, staticIndexOf; 179 import std.range.primitives : back, ElementEncodingType, ElementType, front, 180 hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite, 181 isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put, 182 save; 183 import std.traits : isConvertibleToString, isNarrowString, isSomeChar, 184 isSomeString, StringTypeOf, Unqual; 185 186 //public imports for backward compatibility 187 public import std.algorithm.comparison : cmp; 188 public import std.algorithm.searching : startsWith, endsWith, count; 189 public import std.array : join, replace, replaceInPlace, split, empty; 190 191 /* ************* Exceptions *************** */ 192 193 /++ 194 Exception thrown on errors in std.string functions. 195 +/ 196 class StringException : Exception 197 { 198 import std.exception : basicExceptionCtors; 199 200 /// 201 mixin basicExceptionCtors; 202 } 203 204 /// 205 @safe pure unittest 206 { 207 import std.exception : assertThrown; 208 auto bad = " a\n\tb\n c"; 209 assertThrown!StringException(bad.outdent); 210 } 211 212 /++ 213 Params: 214 cString = A null-terminated c-style string. 215 216 Returns: A D-style array of `char`, `wchar` or `dchar` referencing the same 217 string. The returned array will retain the same type qualifiers as the input. 218 219 $(RED Important Note:) The returned array is a slice of the original buffer. 220 The original data is not changed and not copied. 221 +/ 222 inout(Char)[] fromStringz(Char)(return scope inout(Char)* cString) @nogc @system pure nothrow 223 if (isSomeChar!Char) 224 { 225 import core.stdc.stddef : wchar_t; 226 227 static if (is(immutable Char == immutable char)) 228 import core.stdc.string : cstrlen = strlen; 229 else static if (is(immutable Char == immutable wchar_t)) 230 import core.stdc.wchar_ : cstrlen = wcslen; 231 else 232 static size_t cstrlen(scope const Char* s) 233 { 234 const(Char)* p = s; 235 while (*p) 236 ++p; 237 return p - s; 238 } 239 240 return cString ? cString[0 .. cstrlen(cString)] : null; 241 } 242 243 /// ditto 244 inout(Char)[] fromStringz(Char)(return scope inout(Char)[] cString) @nogc @safe pure nothrow 245 if (isSomeChar!Char) 246 { 247 foreach (i; 0 .. cString.length) 248 if (cString[i] == '\0') 249 return cString[0 .. i]; 250 251 return cString; 252 } 253 254 /// 255 @system pure unittest 256 { 257 assert(fromStringz("foo\0"c.ptr) == "foo"c); 258 assert(fromStringz("foo\0"w.ptr) == "foo"w); 259 assert(fromStringz("foo\0"d.ptr) == "foo"d); 260 261 assert(fromStringz("福\0"c.ptr) == "福"c); 262 assert(fromStringz("福\0"w.ptr) == "福"w); 263 assert(fromStringz("福\0"d.ptr) == "福"d); 264 } 265 266 /// 267 @nogc @safe pure nothrow unittest 268 { 269 struct C 270 { 271 char[32] name; 272 } 273 assert(C("foo\0"c).name.fromStringz() == "foo"c); 274 275 struct W 276 { 277 wchar[32] name; 278 } 279 assert(W("foo\0"w).name.fromStringz() == "foo"w); 280 281 struct D 282 { 283 dchar[32] name; 284 } 285 assert(D("foo\0"d).name.fromStringz() == "foo"d); 286 } 287 288 @nogc @safe pure nothrow unittest 289 { 290 assert( string.init.fromStringz() == ""c); 291 assert(wstring.init.fromStringz() == ""w); 292 assert(dstring.init.fromStringz() == ""d); 293 294 immutable char[3] a = "foo"c; 295 assert(a.fromStringz() == "foo"c); 296 297 immutable wchar[3] b = "foo"w; 298 assert(b.fromStringz() == "foo"w); 299 300 immutable dchar[3] c = "foo"d; 301 assert(c.fromStringz() == "foo"d); 302 } 303 304 @system pure unittest 305 { 306 char* a = null; 307 assert(fromStringz(a) == null); 308 wchar* b = null; 309 assert(fromStringz(b) == null); 310 dchar* c = null; 311 assert(fromStringz(c) == null); 312 313 const char* d = "foo\0"; 314 assert(fromStringz(d) == "foo"); 315 316 immutable char* e = "foo\0"; 317 assert(fromStringz(e) == "foo"); 318 319 const wchar* f = "foo\0"; 320 assert(fromStringz(f) == "foo"); 321 322 immutable wchar* g = "foo\0"; 323 assert(fromStringz(g) == "foo"); 324 325 const dchar* h = "foo\0"; 326 assert(fromStringz(h) == "foo"); 327 328 immutable dchar* i = "foo\0"; 329 assert(fromStringz(i) == "foo"); 330 331 immutable wchar z = 0x0000; 332 // Test some surrogate pairs 333 // high surrogates are in the range 0xD800 .. 0xDC00 334 // low surrogates are in the range 0xDC00 .. 0xE000 335 // since UTF16 doesn't specify endianness we test both. 336 foreach (wchar[] t; [[0xD800, 0xDC00], [0xD800, 0xE000], [0xDC00, 0xDC00], 337 [0xDC00, 0xE000], [0xDA00, 0xDE00]]) 338 { 339 immutable hi = t[0], lo = t[1]; 340 assert(fromStringz([hi, lo, z].ptr) == [hi, lo]); 341 assert(fromStringz([lo, hi, z].ptr) == [lo, hi]); 342 } 343 } 344 345 /++ 346 Params: 347 s = A D-style string. 348 349 Returns: A C-style null-terminated string equivalent to `s`. `s` 350 must not contain embedded `'\0'`'s as any C function will treat the 351 first `'\0'` that it sees as the end of the string. If `s.empty` is 352 `true`, then a string containing only `'\0'` is returned. 353 354 $(RED Important Note:) When passing a `char*` to a C function, and the C 355 function keeps it around for any reason, make sure that you keep a 356 reference to it in your D code. Otherwise, it may become invalid during a 357 garbage collection cycle and cause a nasty bug when the C code tries to use 358 it. 359 +/ 360 immutable(char)* toStringz(scope const(char)[] s) @trusted pure nothrow 361 out (result) 362 { 363 import core.stdc.string : strlen, memcmp; 364 if (result) 365 { 366 auto slen = s.length; 367 while (slen > 0 && s[slen-1] == 0) --slen; 368 assert(strlen(result) == slen, 369 "The result c string is shorter than the in input string"); 370 assert(result[0 .. slen] == s[0 .. slen], 371 "The input and result string are not equal"); 372 } 373 } 374 do 375 { 376 import std.exception : assumeUnique; 377 378 if (s.empty) return "".ptr; 379 380 /+ Unfortunately, this isn't reliable. 381 We could make this work if string literals are put 382 in read-only memory and we test if s[] is pointing into 383 that. 384 385 /* Peek past end of s[], if it's 0, no conversion necessary. 386 * Note that the compiler will put a 0 past the end of static 387 * strings, and the storage allocator will put a 0 past the end 388 * of newly allocated char[]'s. 389 */ 390 char* p = &s[0] + s.length; 391 if (*p == 0) 392 return s; 393 +/ 394 395 // Need to make a copy 396 auto copy = new char[s.length + 1]; 397 copy[0 .. s.length] = s[]; 398 copy[s.length] = 0; 399 400 return &assumeUnique(copy)[0]; 401 } 402 403 /// 404 pure nothrow @system unittest 405 { 406 import core.stdc.string : strlen; 407 import std.conv : to; 408 409 auto p = toStringz("foo"); 410 assert(strlen(p) == 3); 411 const(char)[] foo = "abbzxyzzy"; 412 p = toStringz(foo[3 .. 5]); 413 assert(strlen(p) == 2); 414 415 string test = ""; 416 p = toStringz(test); 417 assert(*p == 0); 418 419 test = "\0"; 420 p = toStringz(test); 421 assert(*p == 0); 422 423 test = "foo\0"; 424 p = toStringz(test); 425 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0); 426 427 const string test2 = ""; 428 p = toStringz(test2); 429 assert(*p == 0); 430 431 assert(toStringz([]) is toStringz("")); 432 } 433 434 pure nothrow @system unittest // https://issues.dlang.org/show_bug.cgi?id=15136 435 { 436 static struct S 437 { 438 immutable char[5] str; 439 ubyte foo; 440 this(char[5] str) pure nothrow 441 { 442 this.str = str; 443 } 444 } 445 auto s = S("01234"); 446 const str = s.str.toStringz; 447 assert(str !is s.str.ptr); 448 assert(*(str + 5) == 0); // Null terminated. 449 s.foo = 42; 450 assert(*(str + 5) == 0); // Still null terminated. 451 } 452 453 454 /** 455 Flag indicating whether a search is case-sensitive. 456 */ 457 alias CaseSensitive = Flag!"caseSensitive"; 458 459 /++ 460 Searches for a character in a string or range. 461 462 Params: 463 s = string or InputRange of characters to search for `c` in 464 c = character to search for in `s` 465 startIdx = index to a well-formed code point in `s` to start 466 searching from; defaults to 0 467 cs = specifies whether comparisons are case-sensitive 468 (`Yes.caseSensitive`) or not (`No.caseSensitive`). 469 470 Returns: 471 If `c` is found in `s`, then the index of its first occurrence is 472 returned. If `c` is not found or `startIdx` is greater than or equal to 473 `s.length`, then -1 is returned. If the parameters are not valid UTF, 474 the result will still be either -1 or in the range [`startIdx` .. 475 `s.length`], but will not be reliable otherwise. 476 477 Throws: 478 If the sequence starting at `startIdx` does not represent a well-formed 479 code point, then a $(REF UTFException, std,utf) may be thrown. 480 481 See_Also: $(REF countUntil, std,algorithm,searching) 482 +/ 483 ptrdiff_t indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive) 484 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 485 { 486 return _indexOf(s, c, cs); 487 } 488 489 /// Ditto 490 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, CaseSensitive cs = Yes.caseSensitive) 491 if (isSomeChar!C) 492 { 493 return _indexOf(s, c, cs); 494 } 495 496 /// Ditto 497 ptrdiff_t indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 498 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 499 { 500 return _indexOf(s, c, startIdx, cs); 501 } 502 503 /// Ditto 504 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 505 if (isSomeChar!C) 506 { 507 return _indexOf(s, c, startIdx, cs); 508 } 509 510 /// 511 @safe pure unittest 512 { 513 import std.typecons : No; 514 515 string s = "Hello World"; 516 assert(indexOf(s, 'W') == 6); 517 assert(indexOf(s, 'Z') == -1); 518 assert(indexOf(s, 'w', No.caseSensitive) == 6); 519 } 520 521 /// 522 @safe pure unittest 523 { 524 import std.typecons : No; 525 526 string s = "Hello World"; 527 assert(indexOf(s, 'W', 4) == 6); 528 assert(indexOf(s, 'Z', 100) == -1); 529 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6); 530 } 531 532 @safe pure unittest 533 { 534 assert(testAliasedString!indexOf("std/string.d", '/')); 535 536 enum S : string { a = "std/string.d" } 537 assert(S.a.indexOf('/') == 3); 538 539 char[S.a.length] sa = S.a[]; 540 assert(sa.indexOf('/') == 3); 541 } 542 543 @safe pure unittest 544 { 545 import std.conv : to; 546 import std.exception : assertCTFEable; 547 import std.traits : EnumMembers; 548 import std.utf : byChar, byWchar, byDchar; 549 550 assertCTFEable!( 551 { 552 static foreach (S; AliasSeq!(string, wstring, dstring)) 553 {{ 554 assert(indexOf(cast(S) null, cast(dchar)'a') == -1); 555 assert(indexOf(to!S("def"), cast(dchar)'a') == -1); 556 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0); 557 assert(indexOf(to!S("def"), cast(dchar)'f') == 2); 558 559 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 560 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 561 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0); 562 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2); 563 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0); 564 565 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 566 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2); 567 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23); 568 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2); 569 }} 570 571 foreach (cs; EnumMembers!CaseSensitive) 572 { 573 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9); 574 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7); 575 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6); 576 577 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9); 578 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7); 579 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6); 580 581 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2); 582 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7); 583 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8); 584 585 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5); 586 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1); 587 } 588 589 char[10] fixedSizeArray = "0123456789"; 590 assert(indexOf(fixedSizeArray, '2') == 2); 591 }); 592 } 593 594 @safe pure unittest 595 { 596 assert(testAliasedString!indexOf("std/string.d", '/', 0)); 597 assert(testAliasedString!indexOf("std/string.d", '/', 1)); 598 assert(testAliasedString!indexOf("std/string.d", '/', 4)); 599 600 enum S : string { a = "std/string.d" } 601 assert(S.a.indexOf('/', 0) == 3); 602 assert(S.a.indexOf('/', 1) == 3); 603 assert(S.a.indexOf('/', 4) == -1); 604 605 char[S.a.length] sa = S.a[]; 606 assert(sa.indexOf('/', 0) == 3); 607 assert(sa.indexOf('/', 1) == 3); 608 assert(sa.indexOf('/', 4) == -1); 609 } 610 611 @safe pure unittest 612 { 613 import std.conv : to; 614 import std.traits : EnumMembers; 615 import std.utf : byCodeUnit, byChar, byWchar; 616 617 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2); 618 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2); 619 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1); 620 621 static foreach (S; AliasSeq!(string, wstring, dstring)) 622 {{ 623 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1); 624 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1); 625 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3); 626 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2); 627 628 assert((to!S("def")).indexOf(cast(dchar)'a', 1, 629 No.caseSensitive) == -1); 630 assert(indexOf(to!S("def"), cast(dchar)'a', 1, 631 No.caseSensitive) == -1); 632 assert(indexOf(to!S("def"), cast(dchar)'a', 12, 633 No.caseSensitive) == -1); 634 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2, 635 No.caseSensitive) == 3); 636 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2); 637 638 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 639 assert(indexOf("def", cast(char)'f', cast(uint) 2, 640 No.caseSensitive) == 2); 641 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23); 642 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1, 643 No.caseSensitive) == 2); 644 }} 645 646 foreach (cs; EnumMembers!CaseSensitive) 647 { 648 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs) 649 == 9); 650 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs) 651 == 7); 652 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs) 653 == 6); 654 } 655 } 656 657 private ptrdiff_t _indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive) 658 if (isInputRange!Range && isSomeChar!(ElementType!Range)) 659 { 660 static import std.ascii; 661 static import std.uni; 662 import std.utf : byDchar, byCodeUnit, UTFException, codeLength; 663 alias Char = Unqual!(ElementEncodingType!Range); 664 665 if (cs == Yes.caseSensitive) 666 { 667 static if (Char.sizeof == 1 && isSomeString!Range) 668 { 669 if (std.ascii.isASCII(c) && !__ctfe) 670 { // Plain old ASCII 671 static ptrdiff_t trustedmemchr(Range s, char c) @trusted 672 { 673 import core.stdc.string : memchr; 674 const p = cast(const(Char)*)memchr(s.ptr, c, s.length); 675 return p ? p - s.ptr : -1; 676 } 677 678 return trustedmemchr(s, cast(char) c); 679 } 680 } 681 682 static if (Char.sizeof == 1) 683 { 684 if (c <= 0x7F) 685 { 686 ptrdiff_t i; 687 foreach (const c2; s) 688 { 689 if (c == c2) 690 return i; 691 ++i; 692 } 693 } 694 else 695 { 696 ptrdiff_t i; 697 foreach (const c2; s.byDchar()) 698 { 699 if (c == c2) 700 return i; 701 i += codeLength!Char(c2); 702 } 703 } 704 } 705 else static if (Char.sizeof == 2) 706 { 707 if (c <= 0xFFFF) 708 { 709 ptrdiff_t i; 710 foreach (const c2; s) 711 { 712 if (c == c2) 713 return i; 714 ++i; 715 } 716 } 717 else if (c <= 0x10FFFF) 718 { 719 // Encode UTF-16 surrogate pair 720 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); 721 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00); 722 ptrdiff_t i; 723 for (auto r = s.byCodeUnit(); !r.empty; r.popFront()) 724 { 725 if (c1 == r.front) 726 { 727 r.popFront(); 728 if (r.empty) // invalid UTF - missing second of pair 729 break; 730 if (c2 == r.front) 731 return i; 732 ++i; 733 } 734 ++i; 735 } 736 } 737 } 738 else static if (Char.sizeof == 4) 739 { 740 ptrdiff_t i; 741 foreach (const c2; s) 742 { 743 if (c == c2) 744 return i; 745 ++i; 746 } 747 } 748 else 749 static assert(0); 750 return -1; 751 } 752 else 753 { 754 if (std.ascii.isASCII(c)) 755 { // Plain old ASCII 756 immutable c1 = cast(char) std.ascii.toLower(c); 757 758 ptrdiff_t i; 759 foreach (const c2; s.byCodeUnit()) 760 { 761 if (c1 == std.ascii.toLower(c2)) 762 return i; 763 ++i; 764 } 765 } 766 else 767 { // c is a universal character 768 immutable c1 = std.uni.toLower(c); 769 770 ptrdiff_t i; 771 foreach (const c2; s.byDchar()) 772 { 773 if (c1 == std.uni.toLower(c2)) 774 return i; 775 i += codeLength!Char(c2); 776 } 777 } 778 } 779 return -1; 780 } 781 782 private ptrdiff_t _indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 783 if (isInputRange!Range && isSomeChar!(ElementType!Range)) 784 { 785 static if (isSomeString!(typeof(s)) || 786 (hasSlicing!(typeof(s)) && hasLength!(typeof(s)))) 787 { 788 if (startIdx < s.length) 789 { 790 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs); 791 if (foundIdx != -1) 792 { 793 return foundIdx + cast(ptrdiff_t) startIdx; 794 } 795 } 796 } 797 else 798 { 799 foreach (i; 0 .. startIdx) 800 { 801 if (s.empty) 802 return -1; 803 s.popFront(); 804 } 805 ptrdiff_t foundIdx = indexOf(s, c, cs); 806 if (foundIdx != -1) 807 { 808 return foundIdx + cast(ptrdiff_t) startIdx; 809 } 810 } 811 return -1; 812 } 813 814 private template _indexOfStr(CaseSensitive cs) 815 { 816 private ptrdiff_t _indexOfStr(Range, Char)(Range s, const(Char)[] sub) 817 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 818 isSomeChar!Char) 819 { 820 alias Char1 = Unqual!(ElementEncodingType!Range); 821 822 static if (isSomeString!Range) 823 { 824 static if (is(Char1 == Char) && cs == Yes.caseSensitive) 825 { 826 import std.algorithm.searching : countUntil; 827 return s.representation.countUntil(sub.representation); 828 } 829 else 830 { 831 import std.algorithm.searching : find; 832 833 const(Char1)[] balance; 834 static if (cs == Yes.caseSensitive) 835 { 836 balance = find(s, sub); 837 } 838 else 839 { 840 balance = find! 841 ((a, b) => toLower(a) == toLower(b)) 842 (s, sub); 843 } 844 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } (); 845 } 846 } 847 else 848 { 849 if (s.empty) 850 return -1; 851 if (sub.empty) 852 return 0; // degenerate case 853 854 import std.utf : byDchar, codeLength; 855 auto subr = sub.byDchar; // decode sub[] by dchar's 856 dchar sub0 = subr.front; // cache first character of sub[] 857 subr.popFront(); 858 859 // Special case for single character search 860 if (subr.empty) 861 return indexOf(s, sub0, cs); 862 863 static if (cs == No.caseSensitive) 864 sub0 = toLower(sub0); 865 866 /* Classic double nested loop search algorithm 867 */ 868 ptrdiff_t index = 0; // count code unit index into s 869 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront()) 870 { 871 dchar c2 = sbydchar.front; 872 static if (cs == No.caseSensitive) 873 c2 = toLower(c2); 874 if (c2 == sub0) 875 { 876 auto s2 = sbydchar.save; // why s must be a forward range 877 foreach (c; subr.save) 878 { 879 s2.popFront(); 880 if (s2.empty) 881 return -1; 882 static if (cs == Yes.caseSensitive) 883 { 884 if (c != s2.front) 885 goto Lnext; 886 } 887 else 888 { 889 if (toLower(c) != toLower(s2.front)) 890 goto Lnext; 891 } 892 } 893 return index; 894 } 895 Lnext: 896 index += codeLength!Char1(c2); 897 } 898 return -1; 899 } 900 } 901 } 902 903 /++ 904 Searches for a substring in a string or range. 905 906 Params: 907 s = string or ForwardRange of characters to search for `sub` in 908 sub = substring to search for in `s` 909 startIdx = index to a well-formed code point in `s` to start 910 searching from; defaults to 0 911 cs = specifies whether comparisons are case-sensitive 912 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 913 914 Returns: 915 The index of the first occurrence of `sub` in `s`. If `sub` is not found 916 or `startIdx` is greater than or equal to `s.length`, then -1 is 917 returned. If the arguments are not valid UTF, the result will still be 918 either -1 or in the range [`startIdx` .. `s.length`], but will not be 919 reliable otherwise. 920 921 Throws: 922 If the sequence starting at `startIdx` does not represent a well-formed 923 code point, then a $(REF UTFException, std,utf) may be thrown. 924 925 Bugs: 926 Does not work with case-insensitive strings where the mapping of 927 $(REF toLower, std,uni) and $(REF toUpper, std,uni) is not 1:1. 928 +/ 929 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub) 930 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 931 isSomeChar!Char) 932 { 933 return _indexOfStr!(Yes.caseSensitive)(s, sub); 934 } 935 936 /// Ditto 937 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub, in CaseSensitive cs) 938 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 939 isSomeChar!Char) 940 { 941 if (cs == Yes.caseSensitive) 942 return indexOf(s, sub); 943 else 944 return _indexOfStr!(No.caseSensitive)(s, sub); 945 } 946 947 /// Ditto 948 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 949 in size_t startIdx) 950 @safe 951 if (isSomeChar!Char1 && isSomeChar!Char2) 952 { 953 if (startIdx >= s.length) 954 return -1; 955 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub); 956 if (foundIdx == -1) 957 return -1; 958 return foundIdx + cast(ptrdiff_t) startIdx; 959 } 960 961 /// Ditto 962 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 963 in size_t startIdx, in CaseSensitive cs) 964 @safe 965 if (isSomeChar!Char1 && isSomeChar!Char2) 966 { 967 if (startIdx >= s.length) 968 return -1; 969 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs); 970 if (foundIdx == -1) 971 return -1; 972 return foundIdx + cast(ptrdiff_t) startIdx; 973 } 974 975 /// 976 @safe pure unittest 977 { 978 import std.typecons : No; 979 980 string s = "Hello World"; 981 assert(indexOf(s, "Wo", 4) == 6); 982 assert(indexOf(s, "Zo", 100) == -1); 983 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6); 984 } 985 986 /// 987 @safe pure unittest 988 { 989 import std.typecons : No; 990 991 string s = "Hello World"; 992 assert(indexOf(s, "Wo") == 6); 993 assert(indexOf(s, "Zo") == -1); 994 assert(indexOf(s, "wO", No.caseSensitive) == 6); 995 } 996 997 @safe pure nothrow @nogc unittest 998 { 999 string s = "Hello World"; 1000 assert(indexOf(s, "Wo", 4) == 6); 1001 assert(indexOf(s, "Zo", 100) == -1); 1002 assert(indexOf(s, "Wo") == 6); 1003 assert(indexOf(s, "Zo") == -1); 1004 } 1005 1006 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub) 1007 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 1008 isSomeChar!Char) && 1009 is(StringTypeOf!Range)) 1010 { 1011 return indexOf!(StringTypeOf!Range)(s, sub); 1012 } 1013 1014 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub, 1015 in CaseSensitive cs) 1016 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 1017 isSomeChar!Char) && 1018 is(StringTypeOf!Range)) 1019 { 1020 return indexOf!(StringTypeOf!Range)(s, sub, cs); 1021 } 1022 1023 @safe pure nothrow @nogc unittest 1024 { 1025 assert(testAliasedString!indexOf("std/string.d", "string")); 1026 } 1027 1028 @safe pure unittest 1029 { 1030 import std.conv : to; 1031 import std.exception : assertCTFEable; 1032 import std.traits : EnumMembers; 1033 1034 assertCTFEable!( 1035 { 1036 static foreach (S; AliasSeq!(string, wstring, dstring)) 1037 { 1038 static foreach (T; AliasSeq!(string, wstring, dstring)) 1039 {{ 1040 assert(indexOf(cast(S) null, to!T("a")) == -1); 1041 assert(indexOf(to!S("def"), to!T("a")) == -1); 1042 assert(indexOf(to!S("abba"), to!T("a")) == 0); 1043 assert(indexOf(to!S("def"), to!T("f")) == 2); 1044 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3); 1045 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6); 1046 1047 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1); 1048 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1); 1049 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0); 1050 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2); 1051 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3); 1052 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6); 1053 1054 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1055 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1056 1057 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1); 1058 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7); 1059 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0); 1060 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17); 1061 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41); 1062 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0); 1063 1064 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 1065 1066 // Thanks to Carlos Santander B. and zwang 1067 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 1068 to!T("page-break-before"), No.caseSensitive) == -1); 1069 }} 1070 1071 foreach (cs; EnumMembers!CaseSensitive) 1072 { 1073 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9); 1074 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7); 1075 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6); 1076 } 1077 } 1078 }); 1079 } 1080 1081 @safe pure @nogc nothrow 1082 unittest 1083 { 1084 import std.traits : EnumMembers; 1085 import std.utf : byWchar; 1086 1087 foreach (cs; EnumMembers!CaseSensitive) 1088 { 1089 assert(indexOf("".byWchar, "", cs) == -1); 1090 assert(indexOf("hello".byWchar, "", cs) == 0); 1091 assert(indexOf("hello".byWchar, "l", cs) == 2); 1092 assert(indexOf("heLLo".byWchar, "LL", cs) == 2); 1093 assert(indexOf("hello".byWchar, "lox", cs) == -1); 1094 assert(indexOf("hello".byWchar, "betty", cs) == -1); 1095 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7); 1096 } 1097 } 1098 1099 @safe pure unittest 1100 { 1101 import std.conv : to; 1102 import std.traits : EnumMembers; 1103 1104 static foreach (S; AliasSeq!(string, wstring, dstring)) 1105 { 1106 static foreach (T; AliasSeq!(string, wstring, dstring)) 1107 {{ 1108 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1); 1109 assert(indexOf(to!S("def"), to!T("a"), 0) == -1); 1110 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3); 1111 assert(indexOf(to!S("def"), to!T("f"), 1) == 2); 1112 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3); 1113 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6); 1114 1115 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1); 1116 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1); 1117 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3); 1118 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2); 1119 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3); 1120 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6); 1121 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9, 1122 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive)) 1123 ~ " " ~ S.stringof ~ " " ~ T.stringof); 1124 1125 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1126 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1127 1128 assert(indexOf(sMars, to!T("MY fAVe"), 10, 1129 No.caseSensitive) == -1); 1130 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7); 1131 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0); 1132 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17); 1133 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41); 1134 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0); 1135 1136 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0); 1137 1138 // Thanks to Carlos Santander B. and zwang 1139 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 1140 to!T("page-break-before"), 10, No.caseSensitive) == -1); 1141 1142 // In order for indexOf with and without index to be consistent 1143 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0)); 1144 }} 1145 1146 foreach (cs; EnumMembers!CaseSensitive) 1147 { 1148 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), 1149 3, cs) == 9); 1150 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), 1151 3, cs) == 7); 1152 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), 1153 3, cs) == 6); 1154 } 1155 } 1156 } 1157 1158 /++ 1159 Searches for the last occurrence of a character in a string. 1160 1161 Params: 1162 s = string to search for `c` in 1163 c = character to search for in `s` 1164 startIdx = index of a well-formed code point in `s` to start searching 1165 from; defaults to 0 1166 cs = specifies whether comparisons are case-sensitive 1167 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 1168 1169 Returns: 1170 If `c` is found in `s`, then the index of its last occurrence is 1171 returned. If `c` is not found or `startIdx` is greater than or equal to 1172 `s.length`, then -1 is returned. If the parameters are not valid UTF, 1173 the result will still be either -1 or in the range [`startIdx` .. 1174 `s.length`], but will not be reliable otherwise. 1175 1176 Throws: 1177 If the sequence ending at `startIdx` does not represent a well-formed 1178 code point, then a $(REF UTFException, std,utf) may be thrown. 1179 +/ 1180 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, 1181 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1182 if (isSomeChar!Char) 1183 { 1184 static import std.ascii, std.uni; 1185 import std.utf : canSearchInCodeUnits; 1186 if (cs == Yes.caseSensitive) 1187 { 1188 if (canSearchInCodeUnits!Char(c)) 1189 { 1190 foreach_reverse (i, it; s) 1191 { 1192 if (it == c) 1193 { 1194 return i; 1195 } 1196 } 1197 } 1198 else 1199 { 1200 foreach_reverse (i, dchar it; s) 1201 { 1202 if (it == c) 1203 { 1204 return i; 1205 } 1206 } 1207 } 1208 } 1209 else 1210 { 1211 if (std.ascii.isASCII(c)) 1212 { 1213 immutable c1 = std.ascii.toLower(c); 1214 1215 foreach_reverse (i, it; s) 1216 { 1217 immutable c2 = std.ascii.toLower(it); 1218 if (c1 == c2) 1219 { 1220 return i; 1221 } 1222 } 1223 } 1224 else 1225 { 1226 immutable c1 = std.uni.toLower(c); 1227 1228 foreach_reverse (i, dchar it; s) 1229 { 1230 immutable c2 = std.uni.toLower(it); 1231 if (c1 == c2) 1232 { 1233 return i; 1234 } 1235 } 1236 } 1237 } 1238 1239 return -1; 1240 } 1241 1242 /// Ditto 1243 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx, 1244 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1245 if (isSomeChar!Char) 1246 { 1247 if (startIdx <= s.length) 1248 { 1249 return lastIndexOf(s[0u .. startIdx], c, cs); 1250 } 1251 1252 return -1; 1253 } 1254 1255 /// 1256 @safe pure unittest 1257 { 1258 import std.typecons : No; 1259 1260 string s = "Hello World"; 1261 assert(lastIndexOf(s, 'l') == 9); 1262 assert(lastIndexOf(s, 'Z') == -1); 1263 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9); 1264 } 1265 1266 /// 1267 @safe pure unittest 1268 { 1269 import std.typecons : No; 1270 1271 string s = "Hello World"; 1272 assert(lastIndexOf(s, 'l', 4) == 3); 1273 assert(lastIndexOf(s, 'Z', 1337) == -1); 1274 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3); 1275 } 1276 1277 @safe pure unittest 1278 { 1279 import std.conv : to; 1280 import std.exception : assertCTFEable; 1281 import std.traits : EnumMembers; 1282 1283 assertCTFEable!( 1284 { 1285 static foreach (S; AliasSeq!(string, wstring, dstring)) 1286 {{ 1287 assert(lastIndexOf(cast(S) null, 'a') == -1); 1288 assert(lastIndexOf(to!S("def"), 'a') == -1); 1289 assert(lastIndexOf(to!S("abba"), 'a') == 3); 1290 assert(lastIndexOf(to!S("def"), 'f') == 2); 1291 assert(lastIndexOf(to!S("ödef"), 'ö') == 0); 1292 1293 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1294 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1295 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3); 1296 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2); 1297 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0); 1298 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"), 1299 No.caseSensitive) == 1); 1300 1301 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1302 1303 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2); 1304 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34); 1305 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40); 1306 }} 1307 1308 foreach (cs; EnumMembers!CaseSensitive) 1309 { 1310 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1311 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1312 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1313 } 1314 }); 1315 } 1316 1317 @safe pure unittest 1318 { 1319 import std.conv : to; 1320 import std.traits : EnumMembers; 1321 1322 static foreach (S; AliasSeq!(string, wstring, dstring)) 1323 {{ 1324 assert(lastIndexOf(cast(S) null, 'a') == -1); 1325 assert(lastIndexOf(to!S("def"), 'a') == -1); 1326 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0); 1327 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2); 1328 1329 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1330 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1331 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3, 1332 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive))); 1333 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2); 1334 1335 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1336 1337 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1); 1338 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34); 1339 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40); 1340 }} 1341 1342 foreach (cs; EnumMembers!CaseSensitive) 1343 { 1344 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1345 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1346 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1347 } 1348 } 1349 1350 /++ 1351 Searches for the last occurrence of a substring in a string. 1352 1353 Params: 1354 s = string to search for `sub` in 1355 sub = substring to search for in `s` 1356 startIdx = index to a well-formed code point in `s` to start 1357 searching from; defaults to 0 1358 cs = specifies whether comparisons are case-sensitive 1359 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 1360 1361 Returns: 1362 The index of the last occurrence of `sub` in `s`. If `sub` is not found 1363 or `startIdx` is greater than or equal to `s.length`, then -1 is 1364 returned. If the parameters are not valid UTF, the result will still be 1365 either -1 or in the range [`startIdx` .. `s.length`], but will not be 1366 reliable otherwise. 1367 1368 Throws: 1369 If the sequence starting at `startIdx` does not represent a well-formed 1370 code point, then a $(REF UTFException, std,utf) may be thrown. 1371 1372 Bugs: 1373 Does not work with case-insensitive strings where the mapping of 1374 $(REF toLower, std,uni) and $(REF toUpper, std,uni) is not 1:1. 1375 +/ 1376 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1377 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1378 if (isSomeChar!Char1 && isSomeChar!Char2) 1379 { 1380 import std.algorithm.searching : endsWith; 1381 import std.conv : to; 1382 import std.range.primitives : walkLength; 1383 static import std.uni; 1384 import std.utf : strideBack; 1385 if (sub.empty) 1386 return -1; 1387 1388 if (walkLength(sub) == 1) 1389 return lastIndexOf(s, sub.front, cs); 1390 1391 if (cs == Yes.caseSensitive) 1392 { 1393 static if (is(immutable Char1 == immutable Char2)) 1394 { 1395 import core.stdc.string : memcmp; 1396 1397 immutable c = sub[0]; 1398 1399 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i) 1400 { 1401 if (s[i] == c) 1402 { 1403 if (__ctfe) 1404 { 1405 if (s[i + 1 .. i + sub.length] == sub[1 .. $]) 1406 return i; 1407 } 1408 else 1409 { 1410 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted 1411 { 1412 return memcmp(s1, s2, n); 1413 } 1414 if (trustedMemcmp(&s[i + 1], &sub[1], 1415 (sub.length - 1) * Char1.sizeof) == 0) 1416 return i; 1417 } 1418 } 1419 } 1420 } 1421 else 1422 { 1423 for (size_t i = s.length; !s.empty;) 1424 { 1425 if (s.endsWith(sub)) 1426 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1427 1428 i -= strideBack(s, i); 1429 s = s[0 .. i]; 1430 } 1431 } 1432 } 1433 else 1434 { 1435 for (size_t i = s.length; !s.empty;) 1436 { 1437 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b)) 1438 (s, sub)) 1439 { 1440 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1441 } 1442 1443 i -= strideBack(s, i); 1444 s = s[0 .. i]; 1445 } 1446 } 1447 1448 return -1; 1449 } 1450 1451 /// Ditto 1452 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1453 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1454 if (isSomeChar!Char1 && isSomeChar!Char2) 1455 { 1456 if (startIdx <= s.length) 1457 { 1458 return lastIndexOf(s[0u .. startIdx], sub, cs); 1459 } 1460 1461 return -1; 1462 } 1463 1464 /// 1465 @safe pure unittest 1466 { 1467 import std.typecons : No; 1468 1469 string s = "Hello World"; 1470 assert(lastIndexOf(s, "ll") == 2); 1471 assert(lastIndexOf(s, "Zo") == -1); 1472 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2); 1473 } 1474 1475 /// 1476 @safe pure unittest 1477 { 1478 import std.typecons : No; 1479 1480 string s = "Hello World"; 1481 assert(lastIndexOf(s, "ll", 4) == 2); 1482 assert(lastIndexOf(s, "Zo", 128) == -1); 1483 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1); 1484 } 1485 1486 @safe pure unittest 1487 { 1488 import std.conv : to; 1489 1490 static foreach (S; AliasSeq!(string, wstring, dstring)) 1491 {{ 1492 auto r = to!S("").lastIndexOf("hello"); 1493 assert(r == -1, to!string(r)); 1494 1495 r = to!S("hello").lastIndexOf(""); 1496 assert(r == -1, to!string(r)); 1497 1498 r = to!S("").lastIndexOf(""); 1499 assert(r == -1, to!string(r)); 1500 }} 1501 } 1502 1503 @safe pure unittest 1504 { 1505 import std.conv : to; 1506 import std.exception : assertCTFEable; 1507 import std.traits : EnumMembers; 1508 1509 assertCTFEable!( 1510 { 1511 static foreach (S; AliasSeq!(string, wstring, dstring)) 1512 { 1513 static foreach (T; AliasSeq!(string, wstring, dstring)) 1514 {{ 1515 enum typeStr = S.stringof ~ " " ~ T.stringof; 1516 1517 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1518 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr); 1519 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr); 1520 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr); 1521 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr); 1522 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr); 1523 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr); 1524 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr); 1525 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr); 1526 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr); 1527 1528 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr); 1529 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1530 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr); 1531 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr); 1532 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr); 1533 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr); 1534 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr); 1535 1536 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1537 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr); 1538 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr); 1539 1540 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0); 1541 1542 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1543 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1544 1545 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr); 1546 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr); 1547 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr); 1548 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr); 1549 }} 1550 1551 foreach (cs; EnumMembers!CaseSensitive) 1552 { 1553 enum csString = to!string(cs); 1554 1555 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString); 1556 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString); 1557 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString); 1558 } 1559 } 1560 }); 1561 } 1562 1563 // https://issues.dlang.org/show_bug.cgi?id=13529 1564 @safe pure unittest 1565 { 1566 import std.conv : to; 1567 static foreach (S; AliasSeq!(string, wstring, dstring)) 1568 { 1569 static foreach (T; AliasSeq!(string, wstring, dstring)) 1570 {{ 1571 enum typeStr = S.stringof ~ " " ~ T.stringof; 1572 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö")); 1573 assert(idx != -1, to!string(idx) ~ " " ~ typeStr); 1574 1575 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd")); 1576 assert(idx == -1, to!string(idx) ~ " " ~ typeStr); 1577 }} 1578 } 1579 } 1580 1581 @safe pure unittest 1582 { 1583 import std.conv : to; 1584 import std.traits : EnumMembers; 1585 1586 static foreach (S; AliasSeq!(string, wstring, dstring)) 1587 { 1588 static foreach (T; AliasSeq!(string, wstring, dstring)) 1589 {{ 1590 enum typeStr = S.stringof ~ " " ~ T.stringof; 1591 1592 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1593 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr); 1594 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr); 1595 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~ 1596 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6))); 1597 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr); 1598 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr); 1599 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr); 1600 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr); 1601 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr); 1602 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~ 1603 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472 1604 1605 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr); 1606 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr); 1607 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~ 1608 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive))); 1609 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr); 1610 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr); 1611 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr); 1612 1613 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr); 1614 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr); 1615 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr); 1616 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr); 1617 }} 1618 1619 foreach (cs; EnumMembers!CaseSensitive) 1620 { 1621 enum csString = to!string(cs); 1622 1623 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString); 1624 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString); 1625 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString); 1626 } 1627 } 1628 } 1629 1630 // https://issues.dlang.org/show_bug.cgi?id=20783 1631 @safe pure @nogc unittest 1632 { 1633 enum lastIndex = "aa".lastIndexOf("ab"); 1634 assert(lastIndex == -1); 1635 } 1636 1637 @safe pure @nogc unittest 1638 { 1639 enum lastIndex = "hello hello hell h".lastIndexOf("hello"); 1640 assert(lastIndex == 6); 1641 } 1642 1643 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)( 1644 const(Char)[] haystack, const(Char2)[] needles, 1645 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1646 if (isSomeChar!Char && isSomeChar!Char2) 1647 { 1648 import std.algorithm.searching : canFind, findAmong; 1649 if (cs == Yes.caseSensitive) 1650 { 1651 static if (forward) 1652 { 1653 static if (any) 1654 { 1655 size_t n = haystack.findAmong(needles).length; 1656 return n ? haystack.length - n : -1; 1657 } 1658 else 1659 { 1660 foreach (idx, dchar hay; haystack) 1661 { 1662 if (!canFind(needles, hay)) 1663 { 1664 return idx; 1665 } 1666 } 1667 } 1668 } 1669 else 1670 { 1671 static if (any) 1672 { 1673 import std.range : retro; 1674 import std.utf : strideBack; 1675 size_t n = haystack.retro.findAmong(needles).source.length; 1676 if (n) 1677 { 1678 return n - haystack.strideBack(n); 1679 } 1680 } 1681 else 1682 { 1683 foreach_reverse (idx, dchar hay; haystack) 1684 { 1685 if (!canFind(needles, hay)) 1686 { 1687 return idx; 1688 } 1689 } 1690 } 1691 } 1692 } 1693 else 1694 { 1695 import std.range.primitives : walkLength; 1696 if (needles.length <= 16 && needles.walkLength(17)) 1697 { 1698 size_t si = 0; 1699 dchar[16] scratch = void; 1700 foreach ( dchar c; needles) 1701 { 1702 scratch[si++] = toLower(c); 1703 } 1704 1705 static if (forward) 1706 { 1707 foreach (i, dchar c; haystack) 1708 { 1709 if (canFind(scratch[0 .. si], toLower(c)) == any) 1710 { 1711 return i; 1712 } 1713 } 1714 } 1715 else 1716 { 1717 foreach_reverse (i, dchar c; haystack) 1718 { 1719 if (canFind(scratch[0 .. si], toLower(c)) == any) 1720 { 1721 return i; 1722 } 1723 } 1724 } 1725 } 1726 else 1727 { 1728 static bool f(dchar a, dchar b) 1729 { 1730 return toLower(a) == b; 1731 } 1732 1733 static if (forward) 1734 { 1735 foreach (i, dchar c; haystack) 1736 { 1737 if (canFind!f(needles, toLower(c)) == any) 1738 { 1739 return i; 1740 } 1741 } 1742 } 1743 else 1744 { 1745 foreach_reverse (i, dchar c; haystack) 1746 { 1747 if (canFind!f(needles, toLower(c)) == any) 1748 { 1749 return i; 1750 } 1751 } 1752 } 1753 } 1754 } 1755 1756 return -1; 1757 } 1758 1759 /** 1760 Searches the string `haystack` for one of the characters in `needles` 1761 starting at index `startIdx`. If `startIdx` is not given, it defaults to 0. 1762 1763 Params: 1764 haystack = string to search for needles in 1765 needles = characters to search for in `haystack` 1766 startIdx = index of a well-formed code point in `haystack` to start 1767 searching from; defaults to 0 1768 cs = specifies whether comparisons are case-sensitive 1769 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 1770 1771 Returns: 1772 The index of the first occurrence of any of the elements of `needles` in 1773 `haystack`. If no element of `needles` is found or `startIdx` is greater 1774 than or equal to `haystack.length`, then -1 is returned. If the 1775 parameters are not valid UTF, the result will still be either -1 or in 1776 the range [`startIdx` .. `haystack.length`], but will not be reliable 1777 otherwise. 1778 1779 Throws: 1780 If the sequence starting at `startIdx` does not represent a well-formed 1781 code point, then a $(REF UTFException, std,utf) may be thrown. 1782 */ 1783 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1784 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1785 if (isSomeChar!Char && isSomeChar!Char2) 1786 { 1787 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs); 1788 } 1789 1790 /// Ditto 1791 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1792 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1793 if (isSomeChar!Char && isSomeChar!Char2) 1794 { 1795 if (startIdx < haystack.length) 1796 { 1797 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs); 1798 if (foundIdx != -1) 1799 { 1800 return foundIdx + cast(ptrdiff_t) startIdx; 1801 } 1802 } 1803 1804 return -1; 1805 } 1806 1807 /// 1808 @safe pure unittest 1809 { 1810 import std.conv : to; 1811 1812 ptrdiff_t i = "helloWorld".indexOfAny("Wr"); 1813 assert(i == 5); 1814 i = "öällo world".indexOfAny("lo "); 1815 assert(i == 4, to!string(i)); 1816 } 1817 1818 /// 1819 @safe pure unittest 1820 { 1821 import std.conv : to; 1822 1823 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4); 1824 assert(i == 5); 1825 1826 i = "Foo öällo world".indexOfAny("lh", 3); 1827 assert(i == 8, to!string(i)); 1828 } 1829 1830 @safe pure unittest 1831 { 1832 import std.conv : to; 1833 1834 static foreach (S; AliasSeq!(string, wstring, dstring)) 1835 {{ 1836 auto r = to!S("").indexOfAny("hello"); 1837 assert(r == -1, to!string(r)); 1838 1839 r = to!S("hello").indexOfAny(""); 1840 assert(r == -1, to!string(r)); 1841 1842 r = to!S("").indexOfAny(""); 1843 assert(r == -1, to!string(r)); 1844 }} 1845 } 1846 1847 @safe pure unittest 1848 { 1849 import std.conv : to; 1850 import std.exception : assertCTFEable; 1851 1852 assertCTFEable!( 1853 { 1854 static foreach (S; AliasSeq!(string, wstring, dstring)) 1855 { 1856 static foreach (T; AliasSeq!(string, wstring, dstring)) 1857 { 1858 assert(indexOfAny(cast(S) null, to!T("a")) == -1); 1859 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1); 1860 assert(indexOfAny(to!S("abba"), to!T("a")) == 0); 1861 assert(indexOfAny(to!S("def"), to!T("f")) == 2); 1862 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1); 1863 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1); 1864 1865 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 1866 No.caseSensitive) == -1); 1867 assert(indexOfAny(to!S("def"), to!T("MI6"), 1868 No.caseSensitive) == -1); 1869 assert(indexOfAny(to!S("abba"), to!T("DEA"), 1870 No.caseSensitive) == 0); 1871 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2); 1872 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive) 1873 == -1); 1874 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"), 1875 No.caseSensitive) == 0); 1876 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), 1877 No.caseSensitive) == 0); 1878 1879 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 1880 } 1881 } 1882 } 1883 ); 1884 } 1885 1886 @safe pure unittest 1887 { 1888 import std.conv : to; 1889 import std.traits : EnumMembers; 1890 1891 static foreach (S; AliasSeq!(string, wstring, dstring)) 1892 { 1893 static foreach (T; AliasSeq!(string, wstring, dstring)) 1894 { 1895 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1); 1896 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1); 1897 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3); 1898 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2); 1899 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3); 1900 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6); 1901 1902 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1, 1903 No.caseSensitive) == -1); 1904 assert(indexOfAny(to!S("def"), to!T("DRS"), 2, 1905 No.caseSensitive) == -1); 1906 assert(indexOfAny(to!S("abba"), to!T("SI"), 3, 1907 No.caseSensitive) == -1); 1908 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1, 1909 No.caseSensitive) == 2); 1910 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2, 1911 No.caseSensitive) == 3); 1912 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4, 1913 No.caseSensitive) == 4); 1914 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9, 1915 No.caseSensitive) == 9); 1916 1917 assert(indexOfAny("\u0100", to!T("\u0100"), 0, 1918 No.caseSensitive) == 0); 1919 } 1920 1921 foreach (cs; EnumMembers!CaseSensitive) 1922 { 1923 assert(indexOfAny("hello\U00010143\u0100\U00010143", 1924 to!S("e\u0100"), 3, cs) == 9); 1925 assert(indexOfAny("hello\U00010143\u0100\U00010143"w, 1926 to!S("h\u0100"), 3, cs) == 7); 1927 assert(indexOfAny("hello\U00010143\u0100\U00010143"d, 1928 to!S("l\u0100"), 5, cs) == 6); 1929 } 1930 } 1931 } 1932 1933 /** 1934 Searches `haystack` for the last occurrence of any of the 1935 characters in `needles`. 1936 1937 Params: 1938 haystack = string to search needles in 1939 needles = characters to search for in `haystack` 1940 stopIdx = index in `haystack` to stop searching at (exclusive); defaults 1941 to `haystack.length` 1942 cs = specifies whether comparisons are case-sensitive 1943 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 1944 1945 Returns: 1946 The index of the last occurrence of any of the characters of `needles` 1947 in `haystack`. If no character of `needles` is found or `stopIdx` is 0, 1948 then -1 is returned. If the parameters are not valid UTF, the result 1949 will still be in the range [-1 .. `stopIdx`], but will not be reliable 1950 otherwise. 1951 */ 1952 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1953 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 1954 @safe pure 1955 if (isSomeChar!Char && isSomeChar!Char2) 1956 { 1957 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs); 1958 } 1959 1960 /// Ditto 1961 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1962 const(Char2)[] needles, in size_t stopIdx, 1963 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1964 if (isSomeChar!Char && isSomeChar!Char2) 1965 { 1966 if (stopIdx <= haystack.length) 1967 { 1968 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs); 1969 } 1970 1971 return -1; 1972 } 1973 1974 /// 1975 @safe pure unittest 1976 { 1977 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo"); 1978 assert(i == 8); 1979 1980 i = "Foo öäöllo world".lastIndexOfAny("öF"); 1981 assert(i == 8); 1982 } 1983 1984 /// 1985 @safe pure unittest 1986 { 1987 import std.conv : to; 1988 1989 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4); 1990 assert(i == 3); 1991 1992 i = "Foo öäöllo world".lastIndexOfAny("öF", 3); 1993 assert(i == 0); 1994 } 1995 1996 @safe pure unittest 1997 { 1998 import std.conv : to; 1999 2000 static foreach (S; AliasSeq!(string, wstring, dstring)) 2001 {{ 2002 auto r = to!S("").lastIndexOfAny("hello"); 2003 assert(r == -1, to!string(r)); 2004 2005 r = to!S("hello").lastIndexOfAny(""); 2006 assert(r == -1, to!string(r)); 2007 2008 r = to!S("").lastIndexOfAny(""); 2009 assert(r == -1, to!string(r)); 2010 }} 2011 } 2012 2013 @safe pure unittest 2014 { 2015 import std.conv : to; 2016 import std.exception : assertCTFEable; 2017 2018 assertCTFEable!( 2019 { 2020 static foreach (S; AliasSeq!(string, wstring, dstring)) 2021 { 2022 static foreach (T; AliasSeq!(string, wstring, dstring)) 2023 {{ 2024 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1); 2025 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1); 2026 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3); 2027 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2); 2028 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6); 2029 2030 ptrdiff_t oeIdx = 9; 2031 if (is(S == wstring) || is(S == dstring)) 2032 { 2033 oeIdx = 8; 2034 } 2035 2036 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg")); 2037 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2038 2039 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 2040 No.caseSensitive) == -1); 2041 assert(lastIndexOfAny(to!S("def"), to!T("MI6"), 2042 No.caseSensitive) == -1); 2043 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"), 2044 No.caseSensitive) == 3); 2045 assert(lastIndexOfAny(to!S("def"), to!T("FBI"), 2046 No.caseSensitive) == 2); 2047 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"), 2048 No.caseSensitive) == -1); 2049 2050 oeIdx = 2; 2051 if (is(S == wstring) || is(S == dstring)) 2052 { 2053 oeIdx = 1; 2054 } 2055 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"), 2056 No.caseSensitive) == oeIdx); 2057 2058 assert(lastIndexOfAny("\u0100", to!T("\u0100"), 2059 No.caseSensitive) == 0); 2060 }} 2061 } 2062 } 2063 ); 2064 } 2065 2066 @safe pure unittest 2067 { 2068 import std.conv : to; 2069 import std.exception : assertCTFEable; 2070 2071 assertCTFEable!( 2072 { 2073 static foreach (S; AliasSeq!(string, wstring, dstring)) 2074 { 2075 static foreach (T; AliasSeq!(string, wstring, dstring)) 2076 {{ 2077 enum typeStr = S.stringof ~ " " ~ T.stringof; 2078 2079 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1, 2080 typeStr); 2081 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6, 2082 typeStr); 2083 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3, 2084 typeStr); 2085 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5, 2086 typeStr); 2087 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2, 2088 typeStr); 2089 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1, 2090 typeStr); 2091 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1, 2092 typeStr); 2093 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0, 2094 typeStr); 2095 2096 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337, 2097 No.caseSensitive) == -1, typeStr); 2098 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7, 2099 No.caseSensitive) == 6, typeStr); 2100 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5, 2101 No.caseSensitive) == 3, typeStr); 2102 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6, 2103 No.caseSensitive) == 5, typeStr); 2104 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8, 2105 No.caseSensitive) == 6, typeStr); 2106 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7, 2107 No.caseSensitive) == -1, typeStr); 2108 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4, 2109 No.caseSensitive) == -1, typeStr); 2110 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2, 2111 No.caseSensitive) == 0, typeStr); 2112 }} 2113 } 2114 } 2115 ); 2116 } 2117 2118 /** 2119 Searches `haystack` for a character not in `needles`. 2120 2121 Params: 2122 haystack = string to search for needles in 2123 needles = characters to search for in `haystack` 2124 startIdx = index of a well-formed code point in `haystack` to start 2125 searching from; defaults to 0 2126 cs = specifies whether comparisons are case-sensitive 2127 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 2128 2129 Returns: 2130 The index of the first character in `haystack` that is not an element of 2131 `needles`. If all characters of `haystack` are elements of `needles` or 2132 `startIdx` is greater than or equal to `haystack.length`, then -1 is 2133 returned. If the parameters are not valid UTF, the result will still be 2134 either -1 or in the range [`startIdx` .. `haystack.length`], but will 2135 not be reliable otherwise. 2136 2137 Throws: 2138 If the sequence starting at `startIdx` does not represent a well-formed 2139 code point, then a $(REF UTFException, std,utf) may be thrown. 2140 */ 2141 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 2142 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 2143 @safe pure 2144 if (isSomeChar!Char && isSomeChar!Char2) 2145 { 2146 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs); 2147 } 2148 2149 /// Ditto 2150 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 2151 const(Char2)[] needles, in size_t startIdx, 2152 in CaseSensitive cs = Yes.caseSensitive) 2153 @safe pure 2154 if (isSomeChar!Char && isSomeChar!Char2) 2155 { 2156 if (startIdx < haystack.length) 2157 { 2158 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)( 2159 haystack[startIdx .. $], needles, cs); 2160 if (foundIdx != -1) 2161 { 2162 return foundIdx + cast(ptrdiff_t) startIdx; 2163 } 2164 } 2165 return -1; 2166 } 2167 2168 /// 2169 @safe pure unittest 2170 { 2171 assert(indexOfNeither("abba", "a", 2) == 2); 2172 assert(indexOfNeither("def", "de", 1) == 2); 2173 assert(indexOfNeither("dfefffg", "dfe", 4) == 6); 2174 } 2175 2176 /// 2177 @safe pure unittest 2178 { 2179 assert(indexOfNeither("def", "a") == 0); 2180 assert(indexOfNeither("def", "de") == 2); 2181 assert(indexOfNeither("dfefffg", "dfe") == 6); 2182 } 2183 2184 @safe pure unittest 2185 { 2186 import std.conv : to; 2187 2188 static foreach (S; AliasSeq!(string, wstring, dstring)) 2189 {{ 2190 auto r = to!S("").indexOfNeither("hello"); 2191 assert(r == -1, to!string(r)); 2192 2193 r = to!S("hello").indexOfNeither(""); 2194 assert(r == 0, to!string(r)); 2195 2196 r = to!S("").indexOfNeither(""); 2197 assert(r == -1, to!string(r)); 2198 }} 2199 } 2200 2201 @safe pure unittest 2202 { 2203 import std.conv : to; 2204 import std.exception : assertCTFEable; 2205 2206 assertCTFEable!( 2207 { 2208 static foreach (S; AliasSeq!(string, wstring, dstring)) 2209 { 2210 static foreach (T; AliasSeq!(string, wstring, dstring)) 2211 { 2212 assert(indexOfNeither(cast(S) null, to!T("a")) == -1); 2213 assert(indexOfNeither("abba", "a") == 1); 2214 2215 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 2216 No.caseSensitive) == 0); 2217 assert(indexOfNeither(to!S("def"), to!T("D"), 2218 No.caseSensitive) == 1); 2219 assert(indexOfNeither(to!S("ABca"), to!T("a"), 2220 No.caseSensitive) == 1); 2221 assert(indexOfNeither(to!S("def"), to!T("f"), 2222 No.caseSensitive) == 0); 2223 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 2224 No.caseSensitive) == 6); 2225 if (is(S == string)) 2226 { 2227 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2228 No.caseSensitive) == 8, 2229 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2230 No.caseSensitive))); 2231 } 2232 else 2233 { 2234 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2235 No.caseSensitive) == 7, 2236 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2237 No.caseSensitive))); 2238 } 2239 } 2240 } 2241 } 2242 ); 2243 } 2244 2245 @safe pure unittest 2246 { 2247 import std.conv : to; 2248 import std.exception : assertCTFEable; 2249 2250 assertCTFEable!( 2251 { 2252 static foreach (S; AliasSeq!(string, wstring, dstring)) 2253 { 2254 static foreach (T; AliasSeq!(string, wstring, dstring)) 2255 { 2256 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1); 2257 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1, 2258 to!string(indexOfNeither(to!S("def"), to!T("a"), 1))); 2259 2260 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4, 2261 No.caseSensitive) == 4); 2262 assert(indexOfNeither(to!S("def"), to!T("D"), 2, 2263 No.caseSensitive) == 2); 2264 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3, 2265 No.caseSensitive) == -1); 2266 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2, 2267 No.caseSensitive) == -1); 2268 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5, 2269 No.caseSensitive) == 6); 2270 if (is(S == string)) 2271 { 2272 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2, 2273 No.caseSensitive) == 3, to!string(indexOfNeither( 2274 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive))); 2275 } 2276 else 2277 { 2278 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2, 2279 No.caseSensitive) == 2, to!string(indexOfNeither( 2280 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive))); 2281 } 2282 } 2283 } 2284 } 2285 ); 2286 } 2287 2288 /** 2289 Searches for the last character in `haystack` that is not in `needles`. 2290 2291 Params: 2292 haystack = string to search for needles in 2293 needles = characters to search for in `haystack` 2294 stopIdx = index in `haystack` to stop searching at (exclusive); 2295 defaults to `haystack.length` 2296 cs = specifies whether comparisons are case-sensitive 2297 (`Yes.caseSensitive`) or not (`No.caseSensitive`) 2298 2299 Returns: 2300 The index of the last character in `haystack` that is not an element of 2301 `needles`. If all characters of `haystack` are in `needles` or `stopIdx` 2302 is 0, then -1 is returned. If the parameters are not valid UTF, the 2303 result will still be in the range [-1 .. `stopIdx`], but will not be 2304 reliable otherwise. 2305 */ 2306 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2307 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 2308 @safe pure 2309 if (isSomeChar!Char && isSomeChar!Char2) 2310 { 2311 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs); 2312 } 2313 2314 /// Ditto 2315 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2316 const(Char2)[] needles, in size_t stopIdx, 2317 in CaseSensitive cs = Yes.caseSensitive) 2318 @safe pure 2319 if (isSomeChar!Char && isSomeChar!Char2) 2320 { 2321 if (stopIdx < haystack.length) 2322 { 2323 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx], 2324 needles, cs); 2325 } 2326 return -1; 2327 } 2328 2329 /// 2330 @safe pure unittest 2331 { 2332 assert(lastIndexOfNeither("abba", "a") == 2); 2333 assert(lastIndexOfNeither("def", "f") == 1); 2334 } 2335 2336 /// 2337 @safe pure unittest 2338 { 2339 assert(lastIndexOfNeither("def", "rsa", 3) == -1); 2340 assert(lastIndexOfNeither("abba", "a", 2) == 1); 2341 } 2342 2343 @safe pure unittest 2344 { 2345 import std.conv : to; 2346 2347 static foreach (S; AliasSeq!(string, wstring, dstring)) 2348 {{ 2349 auto r = to!S("").lastIndexOfNeither("hello"); 2350 assert(r == -1, to!string(r)); 2351 2352 r = to!S("hello").lastIndexOfNeither(""); 2353 assert(r == 4, to!string(r)); 2354 2355 r = to!S("").lastIndexOfNeither(""); 2356 assert(r == -1, to!string(r)); 2357 }} 2358 } 2359 2360 @safe pure unittest 2361 { 2362 import std.conv : to; 2363 import std.exception : assertCTFEable; 2364 2365 assertCTFEable!( 2366 { 2367 static foreach (S; AliasSeq!(string, wstring, dstring)) 2368 { 2369 static foreach (T; AliasSeq!(string, wstring, dstring)) 2370 {{ 2371 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1); 2372 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2); 2373 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2374 2375 ptrdiff_t oeIdx = 8; 2376 if (is(S == string)) 2377 { 2378 oeIdx = 9; 2379 } 2380 2381 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg")); 2382 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2383 2384 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 2385 No.caseSensitive) == 5); 2386 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2387 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"), 2388 to!T("MI6"), No.caseSensitive))); 2389 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 2390 No.caseSensitive) == 6, to!string(lastIndexOfNeither( 2391 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive))); 2392 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 2393 No.caseSensitive) == 1); 2394 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2395 No.caseSensitive) == 6); 2396 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), 2397 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"), 2398 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive))); 2399 }} 2400 } 2401 } 2402 ); 2403 } 2404 2405 @safe pure unittest 2406 { 2407 import std.conv : to; 2408 import std.exception : assertCTFEable; 2409 2410 assertCTFEable!( 2411 { 2412 static foreach (S; AliasSeq!(string, wstring, dstring)) 2413 { 2414 static foreach (T; AliasSeq!(string, wstring, dstring)) 2415 {{ 2416 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1); 2417 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1); 2418 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2419 2420 ptrdiff_t oeIdx = 4; 2421 if (is(S == string)) 2422 { 2423 oeIdx = 5; 2424 } 2425 2426 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"), 2427 7); 2428 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2429 2430 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6, 2431 No.caseSensitive) == 5); 2432 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2, 2433 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"), 2434 to!T("MI6"), 2, No.caseSensitive))); 2435 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6, 2436 No.caseSensitive) == 5, to!string(lastIndexOfNeither( 2437 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive))); 2438 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3, 2439 No.caseSensitive) == 1); 2440 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2, 2441 No.caseSensitive) == 1, to!string(lastIndexOfNeither( 2442 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive))); 2443 }} 2444 } 2445 } 2446 ); 2447 } 2448 2449 /** 2450 * Returns the _representation of a string, which has the same type 2451 * as the string except the character type is replaced by `ubyte`, 2452 * `ushort`, or `uint` depending on the character width. 2453 * 2454 * Params: 2455 * s = The string to return the _representation of. 2456 * 2457 * Returns: 2458 * The _representation of the passed string. 2459 */ 2460 auto representation(Char)(Char[] s) @safe pure nothrow @nogc 2461 if (isSomeChar!Char) 2462 { 2463 import std.traits : ModifyTypePreservingTQ; 2464 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2]; 2465 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s; 2466 } 2467 2468 /// 2469 @safe pure unittest 2470 { 2471 string s = "hello"; 2472 static assert(is(typeof(representation(s)) == immutable(ubyte)[])); 2473 assert(representation(s) is cast(immutable(ubyte)[]) s); 2474 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]); 2475 } 2476 2477 @system pure unittest 2478 { 2479 import std.exception : assertCTFEable; 2480 import std.traits : Fields; 2481 import std.typecons : Tuple; 2482 2483 assertCTFEable!( 2484 { 2485 void test(Char, T)(Char[] str) 2486 { 2487 static assert(is(typeof(representation(str)) == T[])); 2488 assert(representation(str) is cast(T[]) str); 2489 } 2490 2491 static foreach (Type; AliasSeq!(Tuple!(char , ubyte ), 2492 Tuple!(wchar, ushort), 2493 Tuple!(dchar, uint ))) 2494 {{ 2495 alias Char = Fields!Type[0]; 2496 alias Int = Fields!Type[1]; 2497 enum immutable(Char)[] hello = "hello"; 2498 2499 test!( immutable Char, immutable Int)(hello); 2500 test!( const Char, const Int)(hello); 2501 test!( Char, Int)(hello.dup); 2502 test!( shared Char, shared Int)(cast(shared) hello.dup); 2503 test!(const shared Char, const shared Int)(hello); 2504 }} 2505 }); 2506 } 2507 2508 2509 /** 2510 * Capitalize the first character of `s` and convert the rest of `s` to 2511 * lowercase. 2512 * 2513 * Params: 2514 * input = The string to _capitalize. 2515 * 2516 * Returns: 2517 * The capitalized string. 2518 * 2519 * See_Also: 2520 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory 2521 */ 2522 S capitalize(S)(S input) @trusted pure 2523 if (isSomeString!S) 2524 { 2525 import std.array : array; 2526 import std.uni : asCapitalized; 2527 import std.utf : byUTF; 2528 2529 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array; 2530 } 2531 2532 /// 2533 pure @safe unittest 2534 { 2535 assert(capitalize("hello") == "Hello"); 2536 assert(capitalize("World") == "World"); 2537 } 2538 2539 auto capitalize(S)(auto ref S s) 2540 if (!isSomeString!S && is(StringTypeOf!S)) 2541 { 2542 return capitalize!(StringTypeOf!S)(s); 2543 } 2544 2545 @safe pure unittest 2546 { 2547 assert(testAliasedString!capitalize("hello")); 2548 } 2549 2550 @safe pure unittest 2551 { 2552 import std.algorithm.comparison : cmp; 2553 import std.conv : to; 2554 import std.exception : assertCTFEable; 2555 2556 assertCTFEable!( 2557 { 2558 static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[])) 2559 {{ 2560 S s1 = to!S("FoL"); 2561 S s2; 2562 2563 s2 = capitalize(s1); 2564 assert(cmp(s2, "Fol") == 0); 2565 assert(s2 !is s1); 2566 2567 s2 = capitalize(s1[0 .. 2]); 2568 assert(cmp(s2, "Fo") == 0); 2569 2570 s1 = to!S("fOl"); 2571 s2 = capitalize(s1); 2572 assert(cmp(s2, "Fol") == 0); 2573 assert(s2 !is s1); 2574 s1 = to!S("\u0131 \u0130"); 2575 s2 = capitalize(s1); 2576 assert(cmp(s2, "\u0049 i\u0307") == 0); 2577 assert(s2 !is s1); 2578 2579 s1 = to!S("\u017F \u0049"); 2580 s2 = capitalize(s1); 2581 assert(cmp(s2, "\u0053 \u0069") == 0); 2582 assert(s2 !is s1); 2583 }} 2584 }); 2585 } 2586 2587 /++ 2588 Split `s` into an array of lines according to the unicode standard using 2589 `'\r'`, `'\n'`, `"\r\n"`, $(REF lineSep, std,uni), 2590 $(REF paraSep, std,uni), `U+0085` (NEL), `'\v'` and `'\f'` 2591 as delimiters. If `keepTerm` is set to `KeepTerminator.yes`, then the 2592 delimiter is included in the strings returned. 2593 2594 Does not throw on invalid UTF; such is simply passed unchanged 2595 to the output. 2596 2597 Allocates memory; use $(LREF lineSplitter) for an alternative that 2598 does not. 2599 2600 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2601 2602 Params: 2603 s = a string of `chars`, `wchars`, or `dchars`, or any custom 2604 type that casts to a `string` type 2605 keepTerm = whether delimiter is included or not in the results 2606 Returns: 2607 array of strings, each element is a line that is a slice of `s` 2608 See_Also: 2609 $(LREF lineSplitter) 2610 $(REF splitter, std,algorithm) 2611 $(REF splitter, std,regex) 2612 +/ 2613 alias KeepTerminator = Flag!"keepTerminator"; 2614 2615 /// ditto 2616 C[][] splitLines(C)(C[] s, KeepTerminator keepTerm = No.keepTerminator) @safe pure 2617 if (isSomeChar!C) 2618 { 2619 import std.array : appender; 2620 import std.uni : lineSep, paraSep; 2621 2622 size_t iStart = 0; 2623 auto retval = appender!(C[][])(); 2624 2625 for (size_t i; i < s.length; ++i) 2626 { 2627 switch (s[i]) 2628 { 2629 case '\v', '\f', '\n': 2630 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]); 2631 iStart = i + 1; 2632 break; 2633 2634 case '\r': 2635 if (i + 1 < s.length && s[i + 1] == '\n') 2636 { 2637 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2638 iStart = i + 2; 2639 ++i; 2640 } 2641 else 2642 { 2643 goto case '\n'; 2644 } 2645 break; 2646 2647 static if (s[i].sizeof == 1) 2648 { 2649 /* Manually decode: 2650 * lineSep is E2 80 A8 2651 * paraSep is E2 80 A9 2652 */ 2653 case 0xE2: 2654 if (i + 2 < s.length && 2655 s[i + 1] == 0x80 && 2656 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9) 2657 ) 2658 { 2659 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]); 2660 iStart = i + 3; 2661 i += 2; 2662 } 2663 else 2664 goto default; 2665 break; 2666 /* Manually decode: 2667 * NEL is C2 85 2668 */ 2669 case 0xC2: 2670 if (i + 1 < s.length && s[i + 1] == 0x85) 2671 { 2672 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2673 iStart = i + 2; 2674 i += 1; 2675 } 2676 else 2677 goto default; 2678 break; 2679 } 2680 else 2681 { 2682 case lineSep: 2683 case paraSep: 2684 case '\u0085': 2685 goto case '\n'; 2686 } 2687 2688 default: 2689 break; 2690 } 2691 } 2692 2693 if (iStart != s.length) 2694 retval.put(s[iStart .. $]); 2695 2696 return retval.data; 2697 } 2698 2699 /// 2700 @safe pure nothrow unittest 2701 { 2702 string s = "Hello\nmy\rname\nis"; 2703 assert(splitLines(s) == ["Hello", "my", "name", "is"]); 2704 } 2705 2706 @safe pure nothrow unittest 2707 { 2708 string s = "a\xC2\x86b"; 2709 assert(splitLines(s) == [s]); 2710 } 2711 2712 @safe pure nothrow unittest 2713 { 2714 assert(testAliasedString!splitLines("hello\nworld")); 2715 2716 enum S : string { a = "hello\nworld" } 2717 assert(S.a.splitLines() == ["hello", "world"]); 2718 } 2719 2720 @system pure nothrow unittest 2721 { 2722 // dip1000 cannot express an array of scope arrays, so this is not @safe 2723 char[11] sa = "hello\nworld"; 2724 assert(sa.splitLines() == ["hello", "world"]); 2725 } 2726 2727 @safe pure unittest 2728 { 2729 import std.conv : to; 2730 import std.exception : assertCTFEable; 2731 2732 assertCTFEable!( 2733 { 2734 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2735 {{ 2736 auto s = to!S( 2737 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~ 2738 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2739 ); 2740 auto lines = splitLines(s); 2741 assert(lines.length == 14); 2742 assert(lines[0] == ""); 2743 assert(lines[1] == "peter"); 2744 assert(lines[2] == ""); 2745 assert(lines[3] == "paul"); 2746 assert(lines[4] == "jerry"); 2747 assert(lines[5] == "ice"); 2748 assert(lines[6] == "cream"); 2749 assert(lines[7] == ""); 2750 assert(lines[8] == "sunday"); 2751 assert(lines[9] == "mon\u2030day"); 2752 assert(lines[10] == "schadenfreude"); 2753 assert(lines[11] == "kindergarten"); 2754 assert(lines[12] == ""); 2755 assert(lines[13] == "cookies"); 2756 2757 2758 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 2759 auto ulines = splitLines(cast(char[]) u); 2760 assert(cast(ubyte[])(ulines[0]) == u); 2761 2762 lines = splitLines(s, Yes.keepTerminator); 2763 assert(lines.length == 14); 2764 assert(lines[0] == "\r"); 2765 assert(lines[1] == "peter\n"); 2766 assert(lines[2] == "\r"); 2767 assert(lines[3] == "paul\r\n"); 2768 assert(lines[4] == "jerry\u2028"); 2769 assert(lines[5] == "ice\u2029"); 2770 assert(lines[6] == "cream\n"); 2771 assert(lines[7] == "\n"); 2772 assert(lines[8] == "sunday\n"); 2773 assert(lines[9] == "mon\u2030day\n"); 2774 assert(lines[10] == "schadenfreude\v"); 2775 assert(lines[11] == "kindergarten\f"); 2776 assert(lines[12] == "\v"); 2777 assert(lines[13] == "cookies\u0085"); 2778 2779 s.popBack(); // Lop-off trailing \n 2780 lines = splitLines(s); 2781 assert(lines.length == 14); 2782 assert(lines[9] == "mon\u2030day"); 2783 2784 lines = splitLines(s, Yes.keepTerminator); 2785 assert(lines.length == 14); 2786 assert(lines[13] == "cookies"); 2787 }} 2788 }); 2789 } 2790 2791 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range) 2792 { 2793 import std.conv : unsigned; 2794 import std.uni : lineSep, paraSep; 2795 private: 2796 Range _input; 2797 2798 alias IndexType = typeof(unsigned(_input.length)); 2799 enum IndexType _unComputed = IndexType.max; 2800 IndexType iStart = _unComputed; 2801 IndexType iEnd = 0; 2802 IndexType iNext = 0; 2803 2804 public: 2805 this(Range input) 2806 { 2807 _input = input; 2808 } 2809 2810 static if (isInfinite!Range) 2811 { 2812 enum bool empty = false; 2813 } 2814 else 2815 { 2816 @property bool empty() 2817 { 2818 return iStart == _unComputed && iNext == _input.length; 2819 } 2820 } 2821 2822 @property typeof(_input) front() 2823 { 2824 if (iStart == _unComputed) 2825 { 2826 iStart = iNext; 2827 Loop: 2828 for (IndexType i = iNext; ; ++i) 2829 { 2830 if (i == _input.length) 2831 { 2832 iEnd = i; 2833 iNext = i; 2834 break Loop; 2835 } 2836 switch (_input[i]) 2837 { 2838 case '\v', '\f', '\n': 2839 iEnd = i + (keepTerm == Yes.keepTerminator); 2840 iNext = i + 1; 2841 break Loop; 2842 2843 case '\r': 2844 if (i + 1 < _input.length && _input[i + 1] == '\n') 2845 { 2846 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2847 iNext = i + 2; 2848 break Loop; 2849 } 2850 else 2851 { 2852 goto case '\n'; 2853 } 2854 2855 static if (_input[i].sizeof == 1) 2856 { 2857 /* Manually decode: 2858 * lineSep is E2 80 A8 2859 * paraSep is E2 80 A9 2860 */ 2861 case 0xE2: 2862 if (i + 2 < _input.length && 2863 _input[i + 1] == 0x80 && 2864 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9) 2865 ) 2866 { 2867 iEnd = i + (keepTerm == Yes.keepTerminator) * 3; 2868 iNext = i + 3; 2869 break Loop; 2870 } 2871 else 2872 goto default; 2873 /* Manually decode: 2874 * NEL is C2 85 2875 */ 2876 case 0xC2: 2877 if (i + 1 < _input.length && _input[i + 1] == 0x85) 2878 { 2879 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2880 iNext = i + 2; 2881 break Loop; 2882 } 2883 else 2884 goto default; 2885 } 2886 else 2887 { 2888 case '\u0085': 2889 case lineSep: 2890 case paraSep: 2891 goto case '\n'; 2892 } 2893 2894 default: 2895 break; 2896 } 2897 } 2898 } 2899 return _input[iStart .. iEnd]; 2900 } 2901 2902 void popFront() 2903 { 2904 if (iStart == _unComputed) 2905 { 2906 assert(!empty, "Can not popFront an empty range"); 2907 front; 2908 } 2909 iStart = _unComputed; 2910 } 2911 2912 static if (isForwardRange!Range) 2913 { 2914 @property typeof(this) save() 2915 { 2916 auto ret = this; 2917 ret._input = _input.save; 2918 return ret; 2919 } 2920 } 2921 } 2922 2923 /*********************************** 2924 * Split an array or slicable range of characters into a range of lines 2925 using `'\r'`, `'\n'`, `'\v'`, `'\f'`, `"\r\n"`, 2926 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and `'\u0085'` (NEL) 2927 as delimiters. If `keepTerm` is set to `Yes.keepTerminator`, then the 2928 delimiter is included in the slices returned. 2929 2930 Does not throw on invalid UTF; such is simply passed unchanged 2931 to the output. 2932 2933 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2934 2935 Does not allocate memory. 2936 2937 Params: 2938 r = array of `chars`, `wchars`, or `dchars` or a slicable range 2939 keepTerm = whether delimiter is included or not in the results 2940 Returns: 2941 range of slices of the input range `r` 2942 2943 See_Also: 2944 $(LREF splitLines) 2945 $(REF splitter, std,algorithm) 2946 $(REF splitter, std,regex) 2947 */ 2948 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r) 2949 if (hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 2950 { 2951 return LineSplitter!(keepTerm, Range)(r); 2952 } 2953 2954 /// Ditto 2955 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, C)(C[] r) 2956 if (isSomeChar!C) 2957 { 2958 return LineSplitter!(keepTerm, C[])(r); 2959 } 2960 2961 /// 2962 @safe pure unittest 2963 { 2964 import std.array : array; 2965 2966 string s = "Hello\nmy\rname\nis"; 2967 2968 /* notice the call to 'array' to turn the lazy range created by 2969 lineSplitter comparable to the string[] created by splitLines. 2970 */ 2971 assert(lineSplitter(s).array == splitLines(s)); 2972 } 2973 2974 @safe pure unittest 2975 { 2976 import std.array : array; 2977 import std.conv : to; 2978 import std.exception : assertCTFEable; 2979 2980 assertCTFEable!( 2981 { 2982 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2983 {{ 2984 auto s = to!S( 2985 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~ 2986 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2987 ); 2988 2989 auto lines = lineSplitter(s).array; 2990 assert(lines.length == 14); 2991 assert(lines[0] == ""); 2992 assert(lines[1] == "peter"); 2993 assert(lines[2] == ""); 2994 assert(lines[3] == "paul"); 2995 assert(lines[4] == "jerry"); 2996 assert(lines[5] == "ice"); 2997 assert(lines[6] == "cream"); 2998 assert(lines[7] == ""); 2999 assert(lines[8] == "sunday"); 3000 assert(lines[9] == "mon\u2030day"); 3001 assert(lines[10] == "schadenfreude"); 3002 assert(lines[11] == "kindergarten"); 3003 assert(lines[12] == ""); 3004 assert(lines[13] == "cookies"); 3005 3006 3007 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 3008 auto ulines = lineSplitter(cast(char[]) u).array; 3009 assert(cast(ubyte[])(ulines[0]) == u); 3010 3011 lines = lineSplitter!(Yes.keepTerminator)(s).array; 3012 assert(lines.length == 14); 3013 assert(lines[0] == "\r"); 3014 assert(lines[1] == "peter\n"); 3015 assert(lines[2] == "\r"); 3016 assert(lines[3] == "paul\r\n"); 3017 assert(lines[4] == "jerry\u2028"); 3018 assert(lines[5] == "ice\u2029"); 3019 assert(lines[6] == "cream\n"); 3020 assert(lines[7] == "\n"); 3021 assert(lines[8] == "sunday\n"); 3022 assert(lines[9] == "mon\u2030day\n"); 3023 assert(lines[10] == "schadenfreude\v"); 3024 assert(lines[11] == "kindergarten\f"); 3025 assert(lines[12] == "\v"); 3026 assert(lines[13] == "cookies\u0085"); 3027 3028 s.popBack(); // Lop-off trailing \n 3029 lines = lineSplitter(s).array; 3030 assert(lines.length == 14); 3031 assert(lines[9] == "mon\u2030day"); 3032 3033 lines = lineSplitter!(Yes.keepTerminator)(s).array; 3034 assert(lines.length == 14); 3035 assert(lines[13] == "cookies"); 3036 }} 3037 }); 3038 } 3039 3040 /// 3041 @nogc @safe pure unittest 3042 { 3043 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n"; 3044 auto lines = s.lineSplitter(); 3045 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"]; 3046 uint i; 3047 foreach (line; lines) 3048 { 3049 assert(line == witness[i++]); 3050 } 3051 assert(i == witness.length); 3052 } 3053 3054 @nogc @safe pure unittest 3055 { 3056 import std.algorithm.comparison : equal; 3057 import std.range : only; 3058 3059 auto s = "std/string.d"; 3060 auto as = TestAliasedString(s); 3061 assert(equal(s.lineSplitter(), as.lineSplitter())); 3062 3063 enum S : string { a = "hello\nworld" } 3064 assert(equal(S.a.lineSplitter(), only("hello", "world"))); 3065 3066 char[S.a.length] sa = S.a[]; 3067 assert(equal(sa.lineSplitter(), only("hello", "world"))); 3068 } 3069 3070 @safe pure unittest 3071 { 3072 auto s = "line1\nline2"; 3073 auto spl0 = s.lineSplitter!(Yes.keepTerminator); 3074 auto spl1 = spl0.save; 3075 spl0.popFront; 3076 assert(spl1.front ~ spl0.front == s); 3077 string r = "a\xC2\x86b"; 3078 assert(r.lineSplitter.front == r); 3079 } 3080 3081 /++ 3082 Strips leading whitespace (as defined by $(REF isWhite, std,uni)) or 3083 as specified in the second argument. 3084 3085 Params: 3086 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 3087 of characters 3088 chars = string of characters to be stripped 3089 3090 Returns: `input` stripped of leading whitespace or characters 3091 specified in the second argument. 3092 3093 Postconditions: `input` and the returned value 3094 will share the same tail (see $(REF sameTail, std,array)). 3095 3096 See_Also: 3097 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation) 3098 +/ 3099 auto stripLeft(Range)(Range input) 3100 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 3101 !isInfinite!Range && !isConvertibleToString!Range) 3102 { 3103 import std.traits : isDynamicArray; 3104 static import std.ascii; 3105 static import std.uni; 3106 3107 static if (is(immutable ElementEncodingType!Range == immutable dchar) 3108 || is(immutable ElementEncodingType!Range == immutable wchar)) 3109 { 3110 // Decoding is never needed for dchar. It happens not to be needed 3111 // here for wchar because no whitepace is outside the basic 3112 // multilingual plane meaning every whitespace character is encoded 3113 // with a single wchar and due to the design of UTF-16 those wchars 3114 // will not occur as part of the encoding of multi-wchar codepoints. 3115 static if (isDynamicArray!Range) 3116 { 3117 foreach (i; 0 .. input.length) 3118 { 3119 if (!std.uni.isWhite(input[i])) 3120 return input[i .. $]; 3121 } 3122 return input[$ .. $]; 3123 } 3124 else 3125 { 3126 while (!input.empty) 3127 { 3128 if (!std.uni.isWhite(input.front)) 3129 break; 3130 input.popFront(); 3131 } 3132 return input; 3133 } 3134 } 3135 else 3136 { 3137 static if (isDynamicArray!Range) 3138 { 3139 // ASCII optimization for dynamic arrays. 3140 size_t i = 0; 3141 for (const size_t end = input.length; i < end; ++i) 3142 { 3143 auto c = input[i]; 3144 if (c >= 0x80) goto NonAsciiPath; 3145 if (!std.ascii.isWhite(c)) break; 3146 } 3147 input = input[i .. $]; 3148 return input; 3149 3150 NonAsciiPath: 3151 input = input[i .. $]; 3152 // Fall through to standard case. 3153 } 3154 3155 import std.utf : decode, decodeFront, UseReplacementDchar; 3156 3157 static if (isNarrowString!Range) 3158 { 3159 for (size_t index = 0; index < input.length;) 3160 { 3161 const saveIndex = index; 3162 if (!std.uni.isWhite(decode!(UseReplacementDchar.yes)(input, index))) 3163 return input[saveIndex .. $]; 3164 } 3165 return input[$ .. $]; 3166 } 3167 else 3168 { 3169 while (!input.empty) 3170 { 3171 auto c = input.front; 3172 if (std.ascii.isASCII(c)) 3173 { 3174 if (!std.ascii.isWhite(c)) 3175 break; 3176 input.popFront(); 3177 } 3178 else 3179 { 3180 auto save = input.save; 3181 auto dc = decodeFront!(UseReplacementDchar.yes)(input); 3182 if (!std.uni.isWhite(dc)) 3183 return save; 3184 } 3185 } 3186 return input; 3187 } 3188 } 3189 } 3190 3191 /// 3192 nothrow @safe pure unittest 3193 { 3194 import std.uni : lineSep, paraSep; 3195 assert(stripLeft(" hello world ") == 3196 "hello world "); 3197 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") == 3198 "hello world\n\t\v\r"); 3199 assert(stripLeft(" \u2028hello world") == 3200 "hello world"); 3201 assert(stripLeft("hello world") == 3202 "hello world"); 3203 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) == 3204 "hello world" ~ [lineSep]); 3205 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) == 3206 "hello world" ~ [paraSep]); 3207 3208 import std.array : array; 3209 import std.utf : byChar; 3210 assert(stripLeft(" hello world "w.byChar).array == 3211 "hello world "); 3212 assert(stripLeft(" \u2022hello world ".byChar).array == 3213 "\u2022hello world "); 3214 } 3215 3216 auto stripLeft(Range)(auto ref Range str) 3217 if (isConvertibleToString!Range) 3218 { 3219 return stripLeft!(StringTypeOf!Range)(str); 3220 } 3221 3222 @nogc nothrow @safe pure unittest 3223 { 3224 assert(testAliasedString!stripLeft(" hello")); 3225 } 3226 3227 /// Ditto 3228 auto stripLeft(Range, Char)(Range input, const(Char)[] chars) 3229 if (((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3230 isConvertibleToString!Range) && isSomeChar!Char) 3231 { 3232 static if (isConvertibleToString!Range) 3233 return stripLeft!(StringTypeOf!Range)(input, chars); 3234 else 3235 { 3236 for (; !input.empty; input.popFront) 3237 { 3238 if (chars.indexOf(input.front) == -1) 3239 break; 3240 } 3241 return input; 3242 } 3243 } 3244 3245 /// 3246 @safe pure unittest 3247 { 3248 assert(stripLeft(" hello world ", " ") == 3249 "hello world "); 3250 assert(stripLeft("xxxxxhello world ", "x") == 3251 "hello world "); 3252 assert(stripLeft("xxxyy hello world ", "xy ") == 3253 "hello world "); 3254 } 3255 3256 /// 3257 @safe pure unittest 3258 { 3259 import std.array : array; 3260 import std.utf : byChar, byWchar, byDchar; 3261 3262 assert(stripLeft(" xxxyy hello world "w.byChar, "xy ").array == 3263 "hello world "); 3264 3265 assert(stripLeft("\u2028\u2020hello world\u2028"w.byWchar, 3266 "\u2028").array == "\u2020hello world\u2028"); 3267 assert(stripLeft("\U00010001hello world"w.byWchar, " ").array == 3268 "\U00010001hello world"w); 3269 assert(stripLeft("\U00010001 xyhello world"d.byDchar, 3270 "\U00010001 xy").array == "hello world"d); 3271 3272 assert(stripLeft("\u2020hello"w, "\u2020"w) == "hello"w); 3273 assert(stripLeft("\U00010001hello"d, "\U00010001"d) == "hello"d); 3274 assert(stripLeft(" hello ", "") == " hello "); 3275 } 3276 3277 @safe pure unittest 3278 { 3279 assert(testAliasedString!stripLeft(" xyz hello", "xyz ")); 3280 } 3281 3282 /++ 3283 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)) or 3284 as specified in the second argument. 3285 3286 Params: 3287 str = string or random access range of characters 3288 chars = string of characters to be stripped 3289 3290 Returns: 3291 slice of `str` stripped of trailing whitespace or characters 3292 specified in the second argument. 3293 3294 See_Also: 3295 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation) 3296 +/ 3297 auto stripRight(Range)(Range str) 3298 if (isSomeString!Range || 3299 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 3300 !isConvertibleToString!Range && 3301 isSomeChar!(ElementEncodingType!Range)) 3302 { 3303 import std.traits : isDynamicArray; 3304 import std.uni : isWhite; 3305 alias C = Unqual!(ElementEncodingType!(typeof(str))); 3306 3307 static if (isSomeString!(typeof(str)) && C.sizeof >= 2) 3308 { 3309 // No whitespace takes multiple wchars to encode and due to 3310 // the design of UTF-16 those wchars will not occur as part 3311 // of the encoding of multi-wchar codepoints. 3312 foreach_reverse (i, C c; str) 3313 { 3314 if (!isWhite(c)) 3315 return str[0 .. i + 1]; 3316 } 3317 return str[0 .. 0]; 3318 } 3319 else 3320 { 3321 // ASCII optimization for dynamic arrays. 3322 static if (isDynamicArray!(typeof(str))) 3323 { 3324 static import std.ascii; 3325 foreach_reverse (i, C c; str) 3326 { 3327 if (c >= 0x80) 3328 { 3329 str = str[0 .. i + 1]; 3330 goto NonAsciiPath; 3331 } 3332 if (!std.ascii.isWhite(c)) 3333 { 3334 return str[0 .. i + 1]; 3335 } 3336 } 3337 return str[0 .. 0]; 3338 } 3339 3340 NonAsciiPath: 3341 3342 size_t i = str.length; 3343 while (i--) 3344 { 3345 static if (C.sizeof >= 2) 3346 { 3347 // No whitespace takes multiple wchars to encode and due to 3348 // the design of UTF-16 those wchars will not occur as part 3349 // of the encoding of multi-wchar codepoints. 3350 if (isWhite(str[i])) 3351 continue; 3352 break; 3353 } 3354 else static if (C.sizeof == 1) 3355 { 3356 const cx = str[i]; 3357 if (cx <= 0x7F) 3358 { 3359 if (isWhite(cx)) 3360 continue; 3361 break; 3362 } 3363 else 3364 { 3365 if (i == 0 || (0b1100_0000 & cx) != 0b1000_0000) 3366 break; 3367 const uint d = 0b0011_1111 & cx; 3368 const c2 = str[i - 1]; 3369 if ((c2 & 0b1110_0000) == 0b1100_0000) // 2 byte encoding. 3370 { 3371 if (isWhite(d + (uint(c2 & 0b0001_1111) << 6))) 3372 { 3373 i--; 3374 continue; 3375 } 3376 break; 3377 } 3378 if (i == 1 || (c2 & 0b1100_0000) != 0b1000_0000) 3379 break; 3380 const c3 = str[i - 2]; 3381 // In UTF-8 all whitespace is encoded in 3 bytes or fewer. 3382 if ((c3 & 0b1111_0000) == 0b1110_0000 && 3383 isWhite(d + (uint(c2 & 0b0011_1111) << 6) + (uint(c3 & 0b0000_1111) << 12))) 3384 { 3385 i -= 2; 3386 continue; 3387 } 3388 break; 3389 } 3390 } 3391 else 3392 static assert(0); 3393 } 3394 3395 return str[0 .. i + 1]; 3396 } 3397 } 3398 3399 /// 3400 nothrow @safe pure 3401 unittest 3402 { 3403 import std.uni : lineSep, paraSep; 3404 assert(stripRight(" hello world ") == 3405 " hello world"); 3406 assert(stripRight("\n\t\v\rhello world\n\t\v\r") == 3407 "\n\t\v\rhello world"); 3408 assert(stripRight("hello world") == 3409 "hello world"); 3410 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) == 3411 [lineSep] ~ "hello world"); 3412 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) == 3413 [paraSep] ~ "hello world"); 3414 } 3415 3416 auto stripRight(Range)(auto ref Range str) 3417 if (isConvertibleToString!Range) 3418 { 3419 return stripRight!(StringTypeOf!Range)(str); 3420 } 3421 3422 @nogc nothrow @safe pure unittest 3423 { 3424 assert(testAliasedString!stripRight("hello ")); 3425 } 3426 3427 @safe pure unittest 3428 { 3429 import std.array : array; 3430 import std.uni : lineSep, paraSep; 3431 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings; 3432 assert(stripRight(" hello world ".byChar).array == " hello world"); 3433 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w); 3434 assert(stripRight("hello world"d.byDchar).array == "hello world"d); 3435 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020"); 3436 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w); 3437 3438 static foreach (C; AliasSeq!(char, wchar, dchar)) 3439 { 3440 foreach (s; invalidUTFstrings!C()) 3441 { 3442 cast(void) stripRight(s.byUTF!C).array; 3443 } 3444 } 3445 3446 cast(void) stripRight("a\x80".byUTF!char).array; 3447 wstring ws = ['a', cast(wchar) 0xDC00]; 3448 cast(void) stripRight(ws.byUTF!wchar).array; 3449 } 3450 3451 /// Ditto 3452 auto stripRight(Range, Char)(Range str, const(Char)[] chars) 3453 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3454 isConvertibleToString!Range) && isSomeChar!Char) 3455 { 3456 static if (isConvertibleToString!Range) 3457 return stripRight!(StringTypeOf!Range)(str, chars); 3458 else 3459 { 3460 for (; !str.empty; str.popBack) 3461 { 3462 if (chars.indexOf(str.back) == -1) 3463 break; 3464 } 3465 return str; 3466 } 3467 } 3468 3469 /// 3470 @safe pure 3471 unittest 3472 { 3473 assert(stripRight(" hello world ", "x") == 3474 " hello world "); 3475 assert(stripRight(" hello world ", " ") == 3476 " hello world"); 3477 assert(stripRight(" hello worldxy ", "xy ") == 3478 " hello world"); 3479 } 3480 3481 @safe pure unittest 3482 { 3483 assert(testAliasedString!stripRight("hello xyz ", "xyz ")); 3484 } 3485 3486 @safe pure unittest 3487 { 3488 import std.array : array; 3489 import std.utf : byChar, byDchar, byUTF, byWchar; 3490 3491 assert(stripRight(" hello world xyz ".byChar, 3492 "xyz ").array == " hello world"); 3493 assert(stripRight("\u2028hello world\u2020\u2028"w.byWchar, 3494 "\u2028").array == "\u2028hello world\u2020"); 3495 assert(stripRight("hello world\U00010001"w.byWchar, 3496 " ").array == "hello world\U00010001"w); 3497 assert(stripRight("hello world\U00010001 xy"d.byDchar, 3498 "\U00010001 xy").array == "hello world"d); 3499 assert(stripRight("hello\u2020"w, "\u2020"w) == "hello"w); 3500 assert(stripRight("hello\U00010001"d, "\U00010001"d) == "hello"d); 3501 assert(stripRight(" hello ", "") == " hello "); 3502 } 3503 3504 3505 /++ 3506 Strips both leading and trailing whitespace (as defined by 3507 $(REF isWhite, std,uni)) or as specified in the second argument. 3508 3509 Params: 3510 str = string or random access range of characters 3511 chars = string of characters to be stripped 3512 leftChars = string of leading characters to be stripped 3513 rightChars = string of trailing characters to be stripped 3514 3515 Returns: 3516 slice of `str` stripped of leading and trailing whitespace 3517 or characters as specified in the second argument. 3518 3519 See_Also: 3520 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation) 3521 +/ 3522 auto strip(Range)(Range str) 3523 if (isSomeString!Range || 3524 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 3525 !isConvertibleToString!Range && 3526 isSomeChar!(ElementEncodingType!Range)) 3527 { 3528 return stripRight(stripLeft(str)); 3529 } 3530 3531 /// 3532 @safe pure unittest 3533 { 3534 import std.uni : lineSep, paraSep; 3535 assert(strip(" hello world ") == 3536 "hello world"); 3537 assert(strip("\n\t\v\rhello world\n\t\v\r") == 3538 "hello world"); 3539 assert(strip("hello world") == 3540 "hello world"); 3541 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) == 3542 "hello world"); 3543 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) == 3544 "hello world"); 3545 } 3546 3547 auto strip(Range)(auto ref Range str) 3548 if (isConvertibleToString!Range) 3549 { 3550 return strip!(StringTypeOf!Range)(str); 3551 } 3552 3553 @safe pure unittest 3554 { 3555 assert(testAliasedString!strip(" hello world ")); 3556 } 3557 3558 @safe pure unittest 3559 { 3560 import std.algorithm.comparison : equal; 3561 import std.conv : to; 3562 import std.exception : assertCTFEable; 3563 3564 assertCTFEable!( 3565 { 3566 static foreach (S; AliasSeq!( char[], const char[], string, 3567 wchar[], const wchar[], wstring, 3568 dchar[], const dchar[], dstring)) 3569 { 3570 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t ")); 3571 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007")); 3572 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r")); 3573 assert(equal(stripLeft(to!S("1")), "1")); 3574 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE")); 3575 assert(equal(stripLeft(to!S("")), "")); 3576 3577 assert(equal(stripRight(to!S(" foo\t ")), " foo")); 3578 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo")); 3579 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB")); 3580 assert(equal(stripRight(to!S("1")), "1")); 3581 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE")); 3582 assert(equal(stripRight(to!S("")), "")); 3583 3584 assert(equal(strip(to!S(" foo\t ")), "foo")); 3585 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo")); 3586 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB")); 3587 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE")); 3588 assert(equal(strip(to!S("")), "")); 3589 } 3590 }); 3591 } 3592 3593 @safe pure unittest 3594 { 3595 import std.array : sameHead, sameTail; 3596 import std.exception : assertCTFEable; 3597 assertCTFEable!( 3598 { 3599 wstring s = " "; 3600 assert(s.sameTail(s.stripLeft())); 3601 assert(s.sameHead(s.stripRight())); 3602 }); 3603 } 3604 3605 /// Ditto 3606 auto strip(Range, Char)(Range str, const(Char)[] chars) 3607 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3608 isConvertibleToString!Range) && isSomeChar!Char) 3609 { 3610 static if (isConvertibleToString!Range) 3611 return strip!(StringTypeOf!Range)(str, chars); 3612 else 3613 return stripRight(stripLeft(str, chars), chars); 3614 } 3615 3616 /// 3617 @safe pure unittest 3618 { 3619 assert(strip(" hello world ", "x") == 3620 " hello world "); 3621 assert(strip(" hello world ", " ") == 3622 "hello world"); 3623 assert(strip(" xyxyhello worldxyxy ", "xy ") == 3624 "hello world"); 3625 assert(strip("\u2020hello\u2020"w, "\u2020"w) == "hello"w); 3626 assert(strip("\U00010001hello\U00010001"d, "\U00010001"d) == "hello"d); 3627 assert(strip(" hello ", "") == " hello "); 3628 } 3629 3630 @safe pure unittest 3631 { 3632 assert(testAliasedString!strip(" xyz hello world xyz ", "xyz ")); 3633 } 3634 3635 /// Ditto 3636 auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars) 3637 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3638 isConvertibleToString!Range) && isSomeChar!Char) 3639 { 3640 static if (isConvertibleToString!Range) 3641 return strip!(StringTypeOf!Range)(str, leftChars, rightChars); 3642 else 3643 return stripRight(stripLeft(str, leftChars), rightChars); 3644 } 3645 3646 /// 3647 @safe pure unittest 3648 { 3649 assert(strip("xxhelloyy", "x", "y") == "hello"); 3650 assert(strip(" xyxyhello worldxyxyzz ", "xy ", "xyz ") == 3651 "hello world"); 3652 assert(strip("\u2020hello\u2028"w, "\u2020"w, "\u2028"w) == "hello"w); 3653 assert(strip("\U00010001hello\U00010002"d, "\U00010001"d, "\U00010002"d) == 3654 "hello"d); 3655 assert(strip(" hello ", "", "") == " hello "); 3656 } 3657 3658 @safe pure unittest 3659 { 3660 assert(testAliasedString!strip(" xy hello world pq ", "xy ", "pq ")); 3661 } 3662 3663 @safe pure unittest 3664 { 3665 import std.algorithm.comparison : equal; 3666 import std.conv : to; 3667 import std.exception : assertCTFEable; 3668 3669 assertCTFEable!( 3670 { 3671 static foreach (S; AliasSeq!( char[], const char[], string, 3672 wchar[], const wchar[], wstring, 3673 dchar[], const dchar[], dstring)) 3674 { 3675 assert(equal(stripLeft(to!S(" \tfoo\t "), "\t "), "foo\t ")); 3676 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007"), "\u2008 "), 3677 "foo\t \u2007")); 3678 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085 "), 3679 "μ \u0085 \u00BB \r")); 3680 assert(equal(stripLeft(to!S("1"), " "), "1")); 3681 assert(equal(stripLeft(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3682 assert(equal(stripLeft(to!S(""), " "), "")); 3683 3684 assert(equal(stripRight(to!S(" foo\t "), "\t "), " foo")); 3685 assert(equal(stripRight(to!S("\u2008 foo\t \u2007"), "\u2007\t "), 3686 "\u2008 foo")); 3687 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r"), "\r "), 3688 "\u0085 μ \u0085 \u00BB")); 3689 assert(equal(stripRight(to!S("1"), " "), "1")); 3690 assert(equal(stripRight(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3691 assert(equal(stripRight(to!S(""), " "), "")); 3692 3693 assert(equal(strip(to!S(" foo\t "), "\t "), "foo")); 3694 assert(equal(strip(to!S("\u2008 foo\t \u2007"), "\u2008\u2007\t "), 3695 "foo")); 3696 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085\r "), 3697 "μ \u0085 \u00BB")); 3698 assert(equal(strip(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3699 assert(equal(strip(to!S(""), " "), "")); 3700 3701 assert(equal(strip(to!S(" \nfoo\t "), "\n ", "\t "), "foo")); 3702 assert(equal(strip(to!S("\u2008\n foo\t \u2007"), 3703 "\u2008\n ", "\u2007\t "), "foo")); 3704 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB μ \u00BB\r"), 3705 "\u0085 ", "\u00BB\r "), "μ \u0085 \u00BB μ")); 3706 assert(equal(strip(to!S("\U0010FFFE"), " ", " "), "\U0010FFFE")); 3707 assert(equal(strip(to!S(""), " ", " "), "")); 3708 } 3709 }); 3710 } 3711 3712 @safe pure unittest 3713 { 3714 import std.array : sameHead, sameTail; 3715 import std.exception : assertCTFEable; 3716 assertCTFEable!( 3717 { 3718 wstring s = " xyz "; 3719 assert(s.sameTail(s.stripLeft(" "))); 3720 assert(s.sameHead(s.stripRight(" "))); 3721 }); 3722 } 3723 3724 3725 /++ 3726 If `str` ends with `delimiter`, then `str` is returned without 3727 `delimiter` on its end. If it `str` does $(I not) end with 3728 `delimiter`, then it is returned unchanged. 3729 3730 If no `delimiter` is given, then one trailing `'\r'`, `'\n'`, 3731 `"\r\n"`, `'\f'`, `'\v'`, $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni) 3732 is removed from the end of `str`. If `str` does not end with any of those characters, 3733 then it is returned unchanged. 3734 3735 Params: 3736 str = string or indexable range of characters 3737 delimiter = string of characters to be sliced off end of str[] 3738 3739 Returns: 3740 slice of str 3741 +/ 3742 Range chomp(Range)(Range str) 3743 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) || 3744 isNarrowString!Range) && 3745 !isConvertibleToString!Range) 3746 { 3747 import std.uni : lineSep, paraSep, nelSep; 3748 if (str.empty) 3749 return str; 3750 3751 alias C = ElementEncodingType!Range; 3752 3753 switch (str[$ - 1]) 3754 { 3755 case '\n': 3756 { 3757 if (str.length > 1 && str[$ - 2] == '\r') 3758 return str[0 .. $ - 2]; 3759 goto case; 3760 } 3761 case '\r', '\v', '\f': 3762 return str[0 .. $ - 1]; 3763 3764 // Pop off the last character if lineSep, paraSep, or nelSep 3765 static if (is(C : const char)) 3766 { 3767 /* Manually decode: 3768 * lineSep is E2 80 A8 3769 * paraSep is E2 80 A9 3770 */ 3771 case 0xA8: // Last byte of lineSep 3772 case 0xA9: // Last byte of paraSep 3773 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2) 3774 return str [0 .. $ - 3]; 3775 goto default; 3776 3777 /* Manually decode: 3778 * NEL is C2 85 3779 */ 3780 case 0x85: 3781 if (str.length > 1 && str[$ - 2] == 0xC2) 3782 return str [0 .. $ - 2]; 3783 goto default; 3784 } 3785 else 3786 { 3787 case lineSep: 3788 case paraSep: 3789 case nelSep: 3790 return str[0 .. $ - 1]; 3791 } 3792 default: 3793 return str; 3794 } 3795 } 3796 3797 /// Ditto 3798 Range chomp(Range, C2)(Range str, const(C2)[] delimiter) 3799 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 3800 isNarrowString!Range) && 3801 !isConvertibleToString!Range && 3802 isSomeChar!C2) 3803 { 3804 if (delimiter.empty) 3805 return chomp(str); 3806 3807 alias C1 = ElementEncodingType!Range; 3808 3809 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3810 { 3811 import std.algorithm.searching : endsWith; 3812 if (str.endsWith(delimiter)) 3813 return str[0 .. $ - delimiter.length]; 3814 return str; 3815 } 3816 else 3817 { 3818 auto orig = str.save; 3819 3820 static if (isSomeString!Range) 3821 alias C = dchar; // because strings auto-decode 3822 else 3823 alias C = C1; // and ranges do not 3824 3825 foreach_reverse (C c; delimiter) 3826 { 3827 if (str.empty || str.back != c) 3828 return orig; 3829 3830 str.popBack(); 3831 } 3832 3833 return str; 3834 } 3835 } 3836 3837 /// 3838 @safe pure 3839 unittest 3840 { 3841 import std.uni : lineSep, paraSep, nelSep; 3842 import std.utf : decode; 3843 assert(chomp(" hello world \n\r") == " hello world \n"); 3844 assert(chomp(" hello world \r\n") == " hello world "); 3845 assert(chomp(" hello world \f") == " hello world "); 3846 assert(chomp(" hello world \v") == " hello world "); 3847 assert(chomp(" hello world \n\n") == " hello world \n"); 3848 assert(chomp(" hello world \n\n ") == " hello world \n\n "); 3849 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n"); 3850 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n"); 3851 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n"); 3852 assert(chomp(" hello world ") == " hello world "); 3853 assert(chomp(" hello world") == " hello world"); 3854 assert(chomp("") == ""); 3855 3856 assert(chomp(" hello world", "orld") == " hello w"); 3857 assert(chomp(" hello world", " he") == " hello world"); 3858 assert(chomp("", "hello") == ""); 3859 3860 // Don't decode pointlessly 3861 assert(chomp("hello\xFE", "\r") == "hello\xFE"); 3862 } 3863 3864 StringTypeOf!Range chomp(Range)(auto ref Range str) 3865 if (isConvertibleToString!Range) 3866 { 3867 return chomp!(StringTypeOf!Range)(str); 3868 } 3869 3870 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3871 if (isConvertibleToString!Range) 3872 { 3873 return chomp!(StringTypeOf!Range, C2)(str, delimiter); 3874 } 3875 3876 @safe pure unittest 3877 { 3878 assert(testAliasedString!chomp(" hello world \n\r")); 3879 assert(testAliasedString!chomp(" hello world", "orld")); 3880 } 3881 3882 @safe pure unittest 3883 { 3884 import std.conv : to; 3885 import std.exception : assertCTFEable; 3886 3887 assertCTFEable!( 3888 { 3889 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3890 { 3891 // @@@ BUG IN COMPILER, MUST INSERT CAST 3892 assert(chomp(cast(S) null) is null); 3893 assert(chomp(to!S("hello")) == "hello"); 3894 assert(chomp(to!S("hello\n")) == "hello"); 3895 assert(chomp(to!S("hello\r")) == "hello"); 3896 assert(chomp(to!S("hello\r\n")) == "hello"); 3897 assert(chomp(to!S("hello\n\r")) == "hello\n"); 3898 assert(chomp(to!S("hello\n\n")) == "hello\n"); 3899 assert(chomp(to!S("hello\r\r")) == "hello\r"); 3900 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx"); 3901 assert(chomp(to!S("hello\u2028")) == "hello"); 3902 assert(chomp(to!S("hello\u2029")) == "hello"); 3903 assert(chomp(to!S("hello\u0085")) == "hello"); 3904 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028"); 3905 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029"); 3906 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129"); 3907 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185"); 3908 3909 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3910 { 3911 // @@@ BUG IN COMPILER, MUST INSERT CAST 3912 assert(chomp(cast(S) null, cast(T) null) is null); 3913 assert(chomp(to!S("hello\n"), cast(T) null) == "hello"); 3914 assert(chomp(to!S("hello"), to!T("o")) == "hell"); 3915 assert(chomp(to!S("hello"), to!T("p")) == "hello"); 3916 // @@@ BUG IN COMPILER, MUST INSERT CAST 3917 assert(chomp(to!S("hello"), cast(T) null) == "hello"); 3918 assert(chomp(to!S("hello"), to!T("llo")) == "he"); 3919 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e"); 3920 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e"); 3921 } 3922 } 3923 }); 3924 3925 // Ranges 3926 import std.array : array; 3927 import std.utf : byChar, byWchar, byDchar; 3928 assert(chomp("hello world\r\n" .byChar ).array == "hello world"); 3929 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w); 3930 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d); 3931 3932 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d); 3933 3934 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello"); 3935 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d); 3936 } 3937 3938 3939 /++ 3940 If `str` starts with `delimiter`, then the part of `str` following 3941 `delimiter` is returned. If `str` does $(I not) start with 3942 3943 `delimiter`, then it is returned unchanged. 3944 3945 Params: 3946 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 3947 of characters 3948 delimiter = string of characters to be sliced off front of str[] 3949 3950 Returns: 3951 slice of str 3952 +/ 3953 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter) 3954 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) || 3955 isNarrowString!Range) && 3956 !isConvertibleToString!Range && 3957 isSomeChar!C2) 3958 { 3959 alias C1 = ElementEncodingType!Range; 3960 3961 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3962 { 3963 import std.algorithm.searching : startsWith; 3964 if (str.startsWith(delimiter)) 3965 return str[delimiter.length .. $]; 3966 return str; 3967 } 3968 else 3969 { 3970 auto orig = str.save; 3971 3972 static if (isSomeString!Range) 3973 alias C = dchar; // because strings auto-decode 3974 else 3975 alias C = C1; // and ranges do not 3976 3977 foreach (C c; delimiter) 3978 { 3979 if (str.empty || str.front != c) 3980 return orig; 3981 3982 str.popFront(); 3983 } 3984 3985 return str; 3986 } 3987 } 3988 3989 /// 3990 @safe pure unittest 3991 { 3992 assert(chompPrefix("hello world", "he") == "llo world"); 3993 assert(chompPrefix("hello world", "hello w") == "orld"); 3994 assert(chompPrefix("hello world", " world") == "hello world"); 3995 assert(chompPrefix("", "hello") == ""); 3996 } 3997 3998 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3999 if (isConvertibleToString!Range) 4000 { 4001 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter); 4002 } 4003 4004 @safe pure 4005 unittest 4006 { 4007 import std.algorithm.comparison : equal; 4008 import std.conv : to; 4009 import std.exception : assertCTFEable; 4010 assertCTFEable!( 4011 { 4012 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4013 { 4014 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4015 { 4016 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh")); 4017 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde")); 4018 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), "")); 4019 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co")); 4020 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el")); 4021 } 4022 } 4023 }); 4024 4025 // Ranges 4026 import std.array : array; 4027 import std.utf : byChar, byWchar, byDchar; 4028 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world"); 4029 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w); 4030 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d); 4031 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d); 4032 4033 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d); 4034 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d); 4035 4036 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world"); 4037 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d); 4038 } 4039 4040 @safe pure unittest 4041 { 4042 assert(testAliasedString!chompPrefix("hello world", "hello")); 4043 } 4044 4045 /++ 4046 Returns `str` without its last character, if there is one. If `str` 4047 ends with `"\r\n"`, then both are removed. If `str` is empty, then 4048 it is returned unchanged. 4049 4050 Params: 4051 str = string (must be valid UTF) 4052 Returns: 4053 slice of str 4054 +/ 4055 4056 Range chop(Range)(Range str) 4057 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 4058 isNarrowString!Range) && 4059 !isConvertibleToString!Range) 4060 { 4061 if (str.empty) 4062 return str; 4063 4064 static if (isSomeString!Range) 4065 { 4066 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r') 4067 return str[0 .. $ - 2]; 4068 str.popBack(); 4069 return str; 4070 } 4071 else 4072 { 4073 alias C = Unqual!(ElementEncodingType!Range); 4074 C c = str.back; 4075 str.popBack(); 4076 if (c == '\n') 4077 { 4078 if (!str.empty && str.back == '\r') 4079 str.popBack(); 4080 return str; 4081 } 4082 // Pop back a dchar, not just a code unit 4083 static if (C.sizeof == 1) 4084 { 4085 int cnt = 1; 4086 while ((c & 0xC0) == 0x80) 4087 { 4088 if (str.empty) 4089 break; 4090 c = str.back; 4091 str.popBack(); 4092 if (++cnt > 4) 4093 break; 4094 } 4095 } 4096 else static if (C.sizeof == 2) 4097 { 4098 if (c >= 0xD800 && c <= 0xDBFF) 4099 { 4100 if (!str.empty) 4101 str.popBack(); 4102 } 4103 } 4104 else static if (C.sizeof == 4) 4105 { 4106 } 4107 else 4108 static assert(0); 4109 return str; 4110 } 4111 } 4112 4113 /// 4114 @safe pure unittest 4115 { 4116 assert(chop("hello world") == "hello worl"); 4117 assert(chop("hello world\n") == "hello world"); 4118 assert(chop("hello world\r") == "hello world"); 4119 assert(chop("hello world\n\r") == "hello world\n"); 4120 assert(chop("hello world\r\n") == "hello world"); 4121 assert(chop("Walter Bright") == "Walter Brigh"); 4122 assert(chop("") == ""); 4123 } 4124 4125 StringTypeOf!Range chop(Range)(auto ref Range str) 4126 if (isConvertibleToString!Range) 4127 { 4128 return chop!(StringTypeOf!Range)(str); 4129 } 4130 4131 @safe pure unittest 4132 { 4133 assert(testAliasedString!chop("hello world")); 4134 } 4135 4136 @safe pure unittest 4137 { 4138 import std.array : array; 4139 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings; 4140 4141 assert(chop("hello world".byChar).array == "hello worl"); 4142 assert(chop("hello world\n"w.byWchar).array == "hello world"w); 4143 assert(chop("hello world\r"d.byDchar).array == "hello world"d); 4144 assert(chop("hello world\n\r".byChar).array == "hello world\n"); 4145 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w); 4146 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d); 4147 assert(chop("".byChar).array == ""); 4148 4149 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学"); 4150 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w); 4151 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d); 4152 4153 auto ca = invalidUTFstrings!char(); 4154 foreach (s; ca) 4155 { 4156 foreach (c; chop(s.byCodeUnit)) 4157 { 4158 } 4159 } 4160 4161 auto wa = invalidUTFstrings!wchar(); 4162 foreach (s; wa) 4163 { 4164 foreach (c; chop(s.byCodeUnit)) 4165 { 4166 } 4167 } 4168 } 4169 4170 @safe pure unittest 4171 { 4172 import std.algorithm.comparison : equal; 4173 import std.conv : to; 4174 import std.exception : assertCTFEable; 4175 4176 assertCTFEable!( 4177 { 4178 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4179 { 4180 assert(chop(cast(S) null) is null); 4181 assert(equal(chop(to!S("hello")), "hell")); 4182 assert(equal(chop(to!S("hello\r\n")), "hello")); 4183 assert(equal(chop(to!S("hello\n\r")), "hello\n")); 4184 assert(equal(chop(to!S("Verité")), "Verit")); 4185 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果")); 4186 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学")); 4187 } 4188 }); 4189 } 4190 4191 4192 /++ 4193 Left justify `s` in a field `width` characters wide. `fillChar` 4194 is the character that will be used to fill up the space in the field that 4195 `s` doesn't fill. 4196 4197 Params: 4198 s = string 4199 width = minimum field width 4200 fillChar = used to pad end up to `width` characters 4201 4202 Returns: 4203 GC allocated string 4204 4205 See_Also: 4206 $(LREF leftJustifier), which does not allocate 4207 +/ 4208 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') 4209 if (isSomeString!S) 4210 { 4211 import std.array : array; 4212 return leftJustifier(s, width, fillChar).array; 4213 } 4214 4215 /// 4216 @safe pure unittest 4217 { 4218 assert(leftJustify("hello", 7, 'X') == "helloXX"); 4219 assert(leftJustify("hello", 2, 'X') == "hello"); 4220 assert(leftJustify("hello", 9, 'X') == "helloXXXX"); 4221 } 4222 4223 /++ 4224 Left justify `s` in a field `width` characters wide. `fillChar` 4225 is the character that will be used to fill up the space in the field that 4226 `s` doesn't fill. 4227 4228 Params: 4229 r = string or range of characters 4230 width = minimum field width 4231 fillChar = used to pad end up to `width` characters 4232 4233 Returns: 4234 a lazy range of the left justified result 4235 4236 See_Also: 4237 $(LREF rightJustifier) 4238 +/ 4239 4240 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4241 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 4242 !isConvertibleToString!Range) 4243 { 4244 alias C = Unqual!(ElementEncodingType!Range); 4245 4246 static if (C.sizeof == 1) 4247 { 4248 import std.utf : byDchar, byChar; 4249 return leftJustifier(r.byDchar, width, fillChar).byChar; 4250 } 4251 else static if (C.sizeof == 2) 4252 { 4253 import std.utf : byDchar, byWchar; 4254 return leftJustifier(r.byDchar, width, fillChar).byWchar; 4255 } 4256 else static if (C.sizeof == 4) 4257 { 4258 static struct Result 4259 { 4260 private: 4261 Range _input; 4262 size_t _width; 4263 dchar _fillChar; 4264 size_t len; 4265 4266 public: 4267 4268 @property bool empty() 4269 { 4270 return len >= _width && _input.empty; 4271 } 4272 4273 @property C front() 4274 { 4275 return _input.empty ? _fillChar : _input.front; 4276 } 4277 4278 void popFront() 4279 { 4280 ++len; 4281 if (!_input.empty) 4282 _input.popFront(); 4283 } 4284 4285 static if (isForwardRange!Range) 4286 { 4287 @property typeof(this) save() return scope 4288 { 4289 auto ret = this; 4290 ret._input = _input.save; 4291 return ret; 4292 } 4293 } 4294 } 4295 4296 return Result(r, width, fillChar); 4297 } 4298 else 4299 static assert(0); 4300 } 4301 4302 /// 4303 @safe pure @nogc nothrow 4304 unittest 4305 { 4306 import std.algorithm.comparison : equal; 4307 import std.utf : byChar; 4308 assert(leftJustifier("hello", 2).equal("hello".byChar)); 4309 assert(leftJustifier("hello", 7).equal("hello ".byChar)); 4310 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar)); 4311 } 4312 4313 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4314 if (isConvertibleToString!Range) 4315 { 4316 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar); 4317 } 4318 4319 @safe pure unittest 4320 { 4321 auto r = "hello".leftJustifier(8); 4322 r.popFront(); 4323 auto save = r.save; 4324 r.popFront(); 4325 assert(r.front == 'l'); 4326 assert(save.front == 'e'); 4327 } 4328 4329 @safe pure unittest 4330 { 4331 assert(testAliasedString!leftJustifier("hello", 2)); 4332 } 4333 4334 /++ 4335 Right justify `s` in a field `width` characters wide. `fillChar` 4336 is the character that will be used to fill up the space in the field that 4337 `s` doesn't fill. 4338 4339 Params: 4340 s = string 4341 width = minimum field width 4342 fillChar = used to pad end up to `width` characters 4343 4344 Returns: 4345 GC allocated string 4346 4347 See_Also: 4348 $(LREF rightJustifier), which does not allocate 4349 +/ 4350 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') 4351 if (isSomeString!S) 4352 { 4353 import std.array : array; 4354 return rightJustifier(s, width, fillChar).array; 4355 } 4356 4357 /// 4358 @safe pure unittest 4359 { 4360 assert(rightJustify("hello", 7, 'X') == "XXhello"); 4361 assert(rightJustify("hello", 2, 'X') == "hello"); 4362 assert(rightJustify("hello", 9, 'X') == "XXXXhello"); 4363 } 4364 4365 /++ 4366 Right justify `s` in a field `width` characters wide. `fillChar` 4367 is the character that will be used to fill up the space in the field that 4368 `s` doesn't fill. 4369 4370 Params: 4371 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4372 of characters 4373 width = minimum field width 4374 fillChar = used to pad end up to `width` characters 4375 4376 Returns: 4377 a lazy range of the right justified result 4378 4379 See_Also: 4380 $(LREF leftJustifier) 4381 +/ 4382 4383 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4384 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4385 !isConvertibleToString!Range) 4386 { 4387 alias C = Unqual!(ElementEncodingType!Range); 4388 4389 static if (C.sizeof == 1) 4390 { 4391 import std.utf : byDchar, byChar; 4392 return rightJustifier(r.byDchar, width, fillChar).byChar; 4393 } 4394 else static if (C.sizeof == 2) 4395 { 4396 import std.utf : byDchar, byWchar; 4397 return rightJustifier(r.byDchar, width, fillChar).byWchar; 4398 } 4399 else static if (C.sizeof == 4) 4400 { 4401 static struct Result 4402 { 4403 private: 4404 Range _input; 4405 size_t _width; 4406 alias nfill = _width; // number of fill characters to prepend 4407 dchar _fillChar; 4408 bool inited; 4409 4410 // Lazy initialization so constructor is trivial and cannot fail 4411 void initialize() 4412 { 4413 // Replace _width with nfill 4414 // (use alias instead of union because CTFE cannot deal with unions) 4415 assert(_width, "width of 0 not allowed"); 4416 static if (hasLength!Range) 4417 { 4418 immutable len = _input.length; 4419 nfill = (_width > len) ? _width - len : 0; 4420 } 4421 else 4422 { 4423 // Lookahead to see now many fill characters are needed 4424 import std.range : take; 4425 import std.range.primitives : walkLength; 4426 nfill = _width - walkLength(_input.save.take(_width), _width); 4427 } 4428 inited = true; 4429 } 4430 4431 public: 4432 this(Range input, size_t width, dchar fillChar) pure nothrow 4433 { 4434 _input = input; 4435 _fillChar = fillChar; 4436 _width = width; 4437 } 4438 4439 @property bool empty() 4440 { 4441 return !nfill && _input.empty; 4442 } 4443 4444 @property C front() 4445 { 4446 if (!nfill) 4447 return _input.front; // fast path 4448 if (!inited) 4449 initialize(); 4450 return nfill ? _fillChar : _input.front; 4451 } 4452 4453 void popFront() 4454 { 4455 if (!nfill) 4456 _input.popFront(); // fast path 4457 else 4458 { 4459 if (!inited) 4460 initialize(); 4461 if (nfill) 4462 --nfill; 4463 else 4464 _input.popFront(); 4465 } 4466 } 4467 4468 @property typeof(this) save() 4469 { 4470 auto ret = this; 4471 ret._input = _input.save; 4472 return ret; 4473 } 4474 } 4475 4476 return Result(r, width, fillChar); 4477 } 4478 else 4479 static assert(0, "Invalid character type of " ~ C.stringof); 4480 } 4481 4482 /// 4483 @safe pure @nogc nothrow 4484 unittest 4485 { 4486 import std.algorithm.comparison : equal; 4487 import std.utf : byChar; 4488 assert(rightJustifier("hello", 2).equal("hello".byChar)); 4489 assert(rightJustifier("hello", 7).equal(" hello".byChar)); 4490 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar)); 4491 } 4492 4493 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4494 if (isConvertibleToString!Range) 4495 { 4496 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar); 4497 } 4498 4499 @safe pure unittest 4500 { 4501 assert(testAliasedString!rightJustifier("hello", 2)); 4502 } 4503 4504 @safe pure unittest 4505 { 4506 auto r = "hello"d.rightJustifier(6); 4507 r.popFront(); 4508 auto save = r.save; 4509 r.popFront(); 4510 assert(r.front == 'e'); 4511 assert(save.front == 'h'); 4512 4513 auto t = "hello".rightJustifier(7); 4514 t.popFront(); 4515 assert(t.front == ' '); 4516 t.popFront(); 4517 assert(t.front == 'h'); 4518 4519 auto u = "hello"d.rightJustifier(5); 4520 u.popFront(); 4521 u.popFront(); 4522 u.popFront(); 4523 } 4524 4525 /++ 4526 Center `s` in a field `width` characters wide. `fillChar` 4527 is the character that will be used to fill up the space in the field that 4528 `s` doesn't fill. 4529 4530 Params: 4531 s = The string to center 4532 width = Width of the field to center `s` in 4533 fillChar = The character to use for filling excess space in the field 4534 4535 Returns: 4536 The resulting _center-justified string. The returned string is 4537 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier) 4538 instead. 4539 +/ 4540 S center(S)(S s, size_t width, dchar fillChar = ' ') 4541 if (isSomeString!S) 4542 { 4543 import std.array : array; 4544 return centerJustifier(s, width, fillChar).array; 4545 } 4546 4547 /// 4548 @safe pure unittest 4549 { 4550 assert(center("hello", 7, 'X') == "XhelloX"); 4551 assert(center("hello", 2, 'X') == "hello"); 4552 assert(center("hello", 9, 'X') == "XXhelloXX"); 4553 } 4554 4555 @safe pure 4556 unittest 4557 { 4558 import std.conv : to; 4559 import std.exception : assertCTFEable; 4560 4561 assertCTFEable!( 4562 { 4563 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4564 {{ 4565 S s = to!S("hello"); 4566 4567 assert(leftJustify(s, 2) == "hello"); 4568 assert(rightJustify(s, 2) == "hello"); 4569 assert(center(s, 2) == "hello"); 4570 4571 assert(leftJustify(s, 7) == "hello "); 4572 assert(rightJustify(s, 7) == " hello"); 4573 assert(center(s, 7) == " hello "); 4574 4575 assert(leftJustify(s, 8) == "hello "); 4576 assert(rightJustify(s, 8) == " hello"); 4577 assert(center(s, 8) == " hello "); 4578 4579 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100"); 4580 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello"); 4581 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100"); 4582 4583 assert(leftJustify(s, 8, 'ö') == "helloööö"); 4584 assert(rightJustify(s, 8, 'ö') == "öööhello"); 4585 assert(center(s, 8, 'ö') == "öhelloöö"); 4586 }} 4587 }); 4588 } 4589 4590 /++ 4591 Center justify `r` in a field `width` characters wide. `fillChar` 4592 is the character that will be used to fill up the space in the field that 4593 `r` doesn't fill. 4594 4595 Params: 4596 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4597 of characters 4598 width = minimum field width 4599 fillChar = used to pad end up to `width` characters 4600 4601 Returns: 4602 a lazy range of the center justified result 4603 4604 See_Also: 4605 $(LREF leftJustifier) 4606 $(LREF rightJustifier) 4607 +/ 4608 4609 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4610 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4611 !isConvertibleToString!Range) 4612 { 4613 alias C = Unqual!(ElementEncodingType!Range); 4614 4615 static if (C.sizeof == 1) 4616 { 4617 import std.utf : byDchar, byChar; 4618 return centerJustifier(r.byDchar, width, fillChar).byChar; 4619 } 4620 else static if (C.sizeof == 2) 4621 { 4622 import std.utf : byDchar, byWchar; 4623 return centerJustifier(r.byDchar, width, fillChar).byWchar; 4624 } 4625 else static if (C.sizeof == 4) 4626 { 4627 import std.range : chain, repeat; 4628 import std.range.primitives : walkLength; 4629 4630 auto len = walkLength(r.save, width); 4631 if (len > width) 4632 len = width; 4633 const nleft = (width - len) / 2; 4634 const nright = width - len - nleft; 4635 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright)); 4636 } 4637 else 4638 static assert(0); 4639 } 4640 4641 /// 4642 @safe pure @nogc nothrow 4643 unittest 4644 { 4645 import std.algorithm.comparison : equal; 4646 import std.utf : byChar; 4647 assert(centerJustifier("hello", 2).equal("hello".byChar)); 4648 assert(centerJustifier("hello", 8).equal(" hello ".byChar)); 4649 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar)); 4650 } 4651 4652 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4653 if (isConvertibleToString!Range) 4654 { 4655 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar); 4656 } 4657 4658 @safe pure unittest 4659 { 4660 assert(testAliasedString!centerJustifier("hello", 8)); 4661 } 4662 4663 @safe unittest 4664 { 4665 static auto byFwdRange(dstring s) 4666 { 4667 static struct FRange 4668 { 4669 @safe: 4670 dstring str; 4671 this(dstring s) { str = s; } 4672 @property bool empty() { return str.length == 0; } 4673 @property dchar front() { return str[0]; } 4674 void popFront() { str = str[1 .. $]; } 4675 @property FRange save() { return this; } 4676 } 4677 return FRange(s); 4678 } 4679 4680 auto r = centerJustifier(byFwdRange("hello"d), 6); 4681 r.popFront(); 4682 auto save = r.save; 4683 r.popFront(); 4684 assert(r.front == 'l'); 4685 assert(save.front == 'e'); 4686 4687 auto t = "hello".centerJustifier(7); 4688 t.popFront(); 4689 assert(t.front == 'h'); 4690 t.popFront(); 4691 assert(t.front == 'e'); 4692 4693 auto u = byFwdRange("hello"d).centerJustifier(6); 4694 u.popFront(); 4695 u.popFront(); 4696 u.popFront(); 4697 u.popFront(); 4698 u.popFront(); 4699 u.popFront(); 4700 } 4701 4702 4703 /++ 4704 Replace each tab character in `s` with the number of spaces necessary 4705 to align the following character at the next tab stop. 4706 4707 Params: 4708 s = string 4709 tabSize = distance between tab stops 4710 4711 Returns: 4712 GC allocated string with tabs replaced with spaces 4713 +/ 4714 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure 4715 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4716 || __traits(compiles, StringTypeOf!Range)) 4717 { 4718 import std.array : array; 4719 return detabber(s, tabSize).array; 4720 } 4721 4722 /// 4723 @safe pure unittest 4724 { 4725 assert(detab(" \n\tx", 9) == " \n x"); 4726 } 4727 4728 @safe pure unittest 4729 { 4730 static struct TestStruct 4731 { 4732 string s; 4733 alias s this; 4734 } 4735 4736 static struct TestStruct2 4737 { 4738 string s; 4739 alias s this; 4740 @disable this(this); 4741 } 4742 4743 string s = " \n\tx"; 4744 string cmp = " \n x"; 4745 auto t = TestStruct(s); 4746 assert(detab(t, 9) == cmp); 4747 assert(detab(TestStruct(s), 9) == cmp); 4748 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9)); 4749 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9)); 4750 assert(detab(TestStruct2(s), 9) == cmp); 4751 } 4752 4753 /++ 4754 Replace each tab character in `r` with the number of spaces 4755 necessary to align the following character at the next tab stop. 4756 4757 Params: 4758 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4759 tabSize = distance between tab stops 4760 4761 Returns: 4762 lazy forward range with tabs replaced with spaces 4763 +/ 4764 auto detabber(Range)(Range r, size_t tabSize = 8) 4765 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4766 !isConvertibleToString!Range) 4767 { 4768 import std.uni : lineSep, paraSep, nelSep; 4769 import std.utf : codeUnitLimit, decodeFront; 4770 4771 assert(tabSize > 0); 4772 4773 alias C = Unqual!(ElementEncodingType!(Range)); 4774 4775 static struct Result 4776 { 4777 private: 4778 Range _input; 4779 size_t _tabSize; 4780 size_t nspaces; 4781 int column; 4782 size_t index; 4783 4784 public: 4785 4786 this(Range input, size_t tabSize) 4787 { 4788 _input = input; 4789 _tabSize = tabSize; 4790 } 4791 4792 static if (isInfinite!(Range)) 4793 { 4794 enum bool empty = false; 4795 } 4796 else 4797 { 4798 @property bool empty() 4799 { 4800 return _input.empty && nspaces == 0; 4801 } 4802 } 4803 4804 @property C front() 4805 { 4806 if (nspaces) 4807 return ' '; 4808 static if (isSomeString!(Range)) 4809 C c = _input[0]; 4810 else 4811 C c = _input.front; 4812 if (index) 4813 return c; 4814 dchar dc; 4815 if (c < codeUnitLimit!(immutable(C)[])) 4816 { 4817 dc = c; 4818 index = 1; 4819 } 4820 else 4821 { 4822 auto r = _input.save; 4823 dc = decodeFront(r, index); // lookahead to decode 4824 } 4825 switch (dc) 4826 { 4827 case '\r': 4828 case '\n': 4829 case paraSep: 4830 case lineSep: 4831 case nelSep: 4832 column = 0; 4833 break; 4834 4835 case '\t': 4836 nspaces = _tabSize - (column % _tabSize); 4837 column += nspaces; 4838 c = ' '; 4839 break; 4840 4841 default: 4842 ++column; 4843 break; 4844 } 4845 return c; 4846 } 4847 4848 void popFront() 4849 { 4850 if (!index) 4851 front; 4852 if (nspaces) 4853 --nspaces; 4854 if (!nspaces) 4855 { 4856 static if (isSomeString!(Range)) 4857 _input = _input[1 .. $]; 4858 else 4859 _input.popFront(); 4860 --index; 4861 } 4862 } 4863 4864 @property typeof(this) save() 4865 { 4866 auto ret = this; 4867 ret._input = _input.save; 4868 return ret; 4869 } 4870 } 4871 4872 return Result(r, tabSize); 4873 } 4874 4875 /// 4876 @safe pure unittest 4877 { 4878 import std.array : array; 4879 4880 assert(detabber(" \n\tx", 9).array == " \n x"); 4881 } 4882 4883 /// ditto 4884 auto detabber(Range)(auto ref Range r, size_t tabSize = 8) 4885 if (isConvertibleToString!Range) 4886 { 4887 return detabber!(StringTypeOf!Range)(r, tabSize); 4888 } 4889 4890 @safe pure unittest 4891 { 4892 assert(testAliasedString!detabber( " ab\t asdf ", 8)); 4893 } 4894 4895 @safe pure unittest 4896 { 4897 import std.algorithm.comparison : cmp; 4898 import std.conv : to; 4899 import std.exception : assertCTFEable; 4900 4901 assertCTFEable!( 4902 { 4903 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4904 {{ 4905 S s = to!S("This \tis\t a fofof\tof list"); 4906 assert(cmp(detab(s), "This is a fofof of list") == 0); 4907 4908 assert(detab(cast(S) null) is null); 4909 assert(detab("").empty); 4910 assert(detab("a") == "a"); 4911 assert(detab("\t") == " "); 4912 assert(detab("\t", 3) == " "); 4913 assert(detab("\t", 9) == " "); 4914 assert(detab( " ab\t asdf ") == " ab asdf "); 4915 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf "); 4916 assert(detab("\r\t", 9) == "\r "); 4917 assert(detab("\n\t", 9) == "\n "); 4918 assert(detab("\u0085\t", 9) == "\u0085 "); 4919 assert(detab("\u2028\t", 9) == "\u2028 "); 4920 assert(detab(" \u2029\t", 9) == " \u2029 "); 4921 }} 4922 }); 4923 } 4924 4925 /// 4926 @safe pure unittest 4927 { 4928 import std.array : array; 4929 import std.utf : byChar, byWchar; 4930 4931 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 "); 4932 auto r = "hel\tx".byWchar.detabber(); 4933 assert(r.front == 'h'); 4934 auto s = r.save; 4935 r.popFront(); 4936 r.popFront(); 4937 assert(r.front == 'l'); 4938 assert(s.front == 'h'); 4939 } 4940 4941 /++ 4942 Replaces spaces in `s` with the optimal number of tabs. 4943 All spaces and tabs at the end of a line are removed. 4944 4945 Params: 4946 s = String to convert. 4947 tabSize = Tab columns are `tabSize` spaces apart. 4948 4949 Returns: 4950 GC allocated string with spaces replaced with tabs; 4951 use $(LREF entabber) to not allocate. 4952 4953 See_Also: 4954 $(LREF entabber) 4955 +/ 4956 auto entab(Range)(Range s, size_t tabSize = 8) 4957 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4958 { 4959 import std.array : array; 4960 return entabber(s, tabSize).array; 4961 } 4962 4963 /// 4964 @safe pure unittest 4965 { 4966 assert(entab(" x \n") == "\tx\n"); 4967 } 4968 4969 auto entab(Range)(auto ref Range s, size_t tabSize = 8) 4970 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) && 4971 is(StringTypeOf!Range)) 4972 { 4973 return entab!(StringTypeOf!Range)(s, tabSize); 4974 } 4975 4976 @safe pure unittest 4977 { 4978 assert(testAliasedString!entab(" x \n")); 4979 } 4980 4981 /++ 4982 Replaces spaces in range `r` with the optimal number of tabs. 4983 All spaces and tabs at the end of a line are removed. 4984 4985 Params: 4986 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4987 tabSize = distance between tab stops 4988 4989 Returns: 4990 lazy forward range with spaces replaced with tabs 4991 4992 See_Also: 4993 $(LREF entab) 4994 +/ 4995 auto entabber(Range)(Range r, size_t tabSize = 8) 4996 if (isForwardRange!Range && !isConvertibleToString!Range) 4997 { 4998 import std.uni : lineSep, paraSep, nelSep; 4999 import std.utf : codeUnitLimit, decodeFront; 5000 5001 assert(tabSize > 0, "tabSize must be greater than 0"); 5002 alias C = Unqual!(ElementEncodingType!Range); 5003 5004 static struct Result 5005 { 5006 private: 5007 Range _input; 5008 size_t _tabSize; 5009 size_t nspaces; 5010 size_t ntabs; 5011 int column; 5012 size_t index; 5013 5014 @property C getFront() 5015 { 5016 static if (isSomeString!Range) 5017 return _input[0]; // avoid autodecode 5018 else 5019 return _input.front; 5020 } 5021 5022 public: 5023 5024 this(Range input, size_t tabSize) 5025 { 5026 _input = input; 5027 _tabSize = tabSize; 5028 } 5029 5030 @property bool empty() 5031 { 5032 if (ntabs || nspaces) 5033 return false; 5034 5035 /* Since trailing spaces are removed, 5036 * look ahead for anything that is not a trailing space 5037 */ 5038 static if (isSomeString!Range) 5039 { 5040 foreach (c; _input) 5041 { 5042 if (c != ' ' && c != '\t') 5043 return false; 5044 } 5045 return true; 5046 } 5047 else 5048 { 5049 if (_input.empty) 5050 return true; 5051 immutable c = _input.front; 5052 if (c != ' ' && c != '\t') 5053 return false; 5054 auto t = _input.save; 5055 t.popFront(); 5056 foreach (c2; t) 5057 { 5058 if (c2 != ' ' && c2 != '\t') 5059 return false; 5060 } 5061 return true; 5062 } 5063 } 5064 5065 @property C front() 5066 { 5067 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 5068 if (ntabs) 5069 return '\t'; 5070 if (nspaces) 5071 return ' '; 5072 C c = getFront; 5073 if (index) 5074 return c; 5075 dchar dc; 5076 if (c < codeUnitLimit!(immutable(C)[])) 5077 { 5078 index = 1; 5079 dc = c; 5080 if (c == ' ' || c == '\t') 5081 { 5082 // Consume input until a non-blank is encountered 5083 immutable startcol = column; 5084 C cx; 5085 static if (isSomeString!Range) 5086 { 5087 while (1) 5088 { 5089 assert(_input.length, "input did not contain non " 5090 ~ "whitespace character"); 5091 cx = _input[0]; 5092 if (cx == ' ') 5093 ++column; 5094 else if (cx == '\t') 5095 column += _tabSize - (column % _tabSize); 5096 else 5097 break; 5098 _input = _input[1 .. $]; 5099 } 5100 } 5101 else 5102 { 5103 while (1) 5104 { 5105 assert(_input.length, "input did not contain non " 5106 ~ "whitespace character"); 5107 cx = _input.front; 5108 if (cx == ' ') 5109 ++column; 5110 else if (cx == '\t') 5111 column += _tabSize - (column % _tabSize); 5112 else 5113 break; 5114 _input.popFront(); 5115 } 5116 } 5117 // Compute ntabs+nspaces to get from startcol to column 5118 immutable n = column - startcol; 5119 if (n == 1) 5120 { 5121 nspaces = 1; 5122 } 5123 else 5124 { 5125 ntabs = column / _tabSize - startcol / _tabSize; 5126 if (ntabs == 0) 5127 nspaces = column - startcol; 5128 else 5129 nspaces = column % _tabSize; 5130 } 5131 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize); 5132 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces); 5133 if (cx < codeUnitLimit!(immutable(C)[])) 5134 { 5135 dc = cx; 5136 index = 1; 5137 } 5138 else 5139 { 5140 auto r = _input.save; 5141 dc = decodeFront(r, index); // lookahead to decode 5142 } 5143 switch (dc) 5144 { 5145 case '\r': 5146 case '\n': 5147 case paraSep: 5148 case lineSep: 5149 case nelSep: 5150 column = 0; 5151 // Spaces followed by newline are ignored 5152 ntabs = 0; 5153 nspaces = 0; 5154 return cx; 5155 5156 default: 5157 ++column; 5158 break; 5159 } 5160 return ntabs ? '\t' : ' '; 5161 } 5162 } 5163 else 5164 { 5165 auto r = _input.save; 5166 dc = decodeFront(r, index); // lookahead to decode 5167 } 5168 //writefln("dc = x%x", dc); 5169 switch (dc) 5170 { 5171 case '\r': 5172 case '\n': 5173 case paraSep: 5174 case lineSep: 5175 case nelSep: 5176 column = 0; 5177 break; 5178 5179 default: 5180 ++column; 5181 break; 5182 } 5183 return c; 5184 } 5185 5186 void popFront() 5187 { 5188 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 5189 if (!index) 5190 front; 5191 if (ntabs) 5192 --ntabs; 5193 else if (nspaces) 5194 --nspaces; 5195 else if (!ntabs && !nspaces) 5196 { 5197 static if (isSomeString!Range) 5198 _input = _input[1 .. $]; 5199 else 5200 _input.popFront(); 5201 --index; 5202 } 5203 } 5204 5205 @property typeof(this) save() 5206 { 5207 auto ret = this; 5208 ret._input = _input.save; 5209 return ret; 5210 } 5211 } 5212 5213 return Result(r, tabSize); 5214 } 5215 5216 /// 5217 @safe pure unittest 5218 { 5219 import std.array : array; 5220 assert(entabber(" x \n").array == "\tx\n"); 5221 } 5222 5223 auto entabber(Range)(auto ref Range r, size_t tabSize = 8) 5224 if (isConvertibleToString!Range) 5225 { 5226 return entabber!(StringTypeOf!Range)(r, tabSize); 5227 } 5228 5229 @safe pure unittest 5230 { 5231 assert(testAliasedString!entabber(" ab asdf ", 8)); 5232 } 5233 5234 @safe pure 5235 unittest 5236 { 5237 import std.conv : to; 5238 import std.exception : assertCTFEable; 5239 5240 assertCTFEable!( 5241 { 5242 assert(entab(cast(string) null) is null); 5243 assert(entab("").empty); 5244 assert(entab("a") == "a"); 5245 assert(entab(" ") == ""); 5246 assert(entab(" x") == "\tx"); 5247 assert(entab(" ab asdf ") == " ab\tasdf"); 5248 assert(entab(" ab asdf ") == " ab\t asdf"); 5249 assert(entab(" ab \t asdf ") == " ab\t asdf"); 5250 assert(entab("1234567 \ta") == "1234567\t\ta"); 5251 assert(entab("1234567 \ta") == "1234567\t\ta"); 5252 assert(entab("1234567 \ta") == "1234567\t\ta"); 5253 assert(entab("1234567 \ta") == "1234567\t\ta"); 5254 assert(entab("1234567 \ta") == "1234567\t\ta"); 5255 assert(entab("1234567 \ta") == "1234567\t\ta"); 5256 assert(entab("1234567 \ta") == "1234567\t\ta"); 5257 assert(entab("1234567 \ta") == "1234567\t\ta"); 5258 assert(entab("1234567 \ta") == "1234567\t\t\ta"); 5259 5260 assert(entab("a ") == "a"); 5261 assert(entab("a\v") == "a\v"); 5262 assert(entab("a\f") == "a\f"); 5263 assert(entab("a\n") == "a\n"); 5264 assert(entab("a\n\r") == "a\n\r"); 5265 assert(entab("a\r\n") == "a\r\n"); 5266 assert(entab("a\u2028") == "a\u2028"); 5267 assert(entab("a\u2029") == "a\u2029"); 5268 assert(entab("a\u0085") == "a\u0085"); 5269 assert(entab("a ") == "a"); 5270 assert(entab("a\t") == "a"); 5271 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") == 5272 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F"); 5273 assert(entab(" \naa") == "\naa"); 5274 assert(entab(" \r aa") == "\r aa"); 5275 assert(entab(" \u2028 aa") == "\u2028 aa"); 5276 assert(entab(" \u2029 aa") == "\u2029 aa"); 5277 assert(entab(" \u0085 aa") == "\u0085 aa"); 5278 }); 5279 } 5280 5281 @safe pure 5282 unittest 5283 { 5284 import std.array : array; 5285 import std.utf : byChar; 5286 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa"); 5287 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa"); 5288 5289 auto r = entabber("1234", 4); 5290 r.popFront(); 5291 auto rsave = r.save; 5292 r.popFront(); 5293 assert(r.front == '3'); 5294 assert(rsave.front == '2'); 5295 } 5296 5297 5298 /++ 5299 Replaces the characters in `str` which are keys in `transTable` with 5300 their corresponding values in `transTable`. `transTable` is an AA 5301 where its keys are `dchar` and its values are either `dchar` or some 5302 type of string. Also, if `toRemove` is given, the characters in it are 5303 removed from `str` prior to translation. `str` itself is unaltered. 5304 A copy with the changes is returned. 5305 5306 See_Also: 5307 $(LREF tr), 5308 $(REF replace, std,array), 5309 $(REF substitute, std,algorithm,iteration) 5310 5311 Params: 5312 str = The original string. 5313 transTable = The AA indicating which characters to replace and what to 5314 replace them with. 5315 toRemove = The characters to remove from the string. 5316 +/ 5317 C1[] translate(C1, C2 = immutable char)(C1[] str, 5318 in dchar[dchar] transTable, 5319 const(C2)[] toRemove = null) @safe pure 5320 if (isSomeChar!C1 && isSomeChar!C2) 5321 { 5322 import std.array : appender; 5323 auto buffer = appender!(C1[])(); 5324 translateImpl(str, transTable, toRemove, buffer); 5325 return buffer.data; 5326 } 5327 5328 /// 5329 @safe pure unittest 5330 { 5331 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5332 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5333 5334 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5335 5336 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5337 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 5338 } 5339 5340 // https://issues.dlang.org/show_bug.cgi?id=13018 5341 @safe pure unittest 5342 { 5343 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5344 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5345 5346 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5347 5348 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5349 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 5350 } 5351 5352 @system pure unittest 5353 { 5354 import std.conv : to; 5355 import std.exception : assertCTFEable; 5356 5357 assertCTFEable!( 5358 { 5359 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 5360 wchar[], const(wchar)[], immutable(wchar)[], 5361 dchar[], const(dchar)[], immutable(dchar)[])) 5362 {(){ // workaround slow optimizations for large functions 5363 // https://issues.dlang.org/show_bug.cgi?id=2396 5364 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) == 5365 to!S("qe55o wor5d")); 5366 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) == 5367 to!S("he\U00010143\U00010143l wlr\U00010143d")); 5368 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) == 5369 to!S("qe55o \U00010143 wor5d")); 5370 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) == 5371 to!S("hell0 o w0rld")); 5372 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world")); 5373 5374 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 5375 wchar[], const(wchar)[], immutable(wchar)[], 5376 dchar[], const(dchar)[], immutable(dchar)[])) 5377 (){ // workaround slow optimizations for large functions 5378 // https://issues.dlang.org/show_bug.cgi?id=2396 5379 static foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar], 5380 immutable dchar[dchar])) 5381 {{ 5382 R tt = ['h' : 'q', 'l' : '5']; 5383 assert(translate(to!S("hello world"), tt, to!T("r")) 5384 == to!S("qe55o wo5d")); 5385 assert(translate(to!S("hello world"), tt, to!T("helo")) 5386 == to!S(" wrd")); 5387 assert(translate(to!S("hello world"), tt, to!T("q5")) 5388 == to!S("qe55o wor5d")); 5389 }} 5390 }(); 5391 5392 auto s = to!S("hello world"); 5393 dchar[dchar] transTable = ['h' : 'q', 'l' : '5']; 5394 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 5395 assert(translate(s, transTable) == "qe55o wor5d"); 5396 }();} 5397 }); 5398 } 5399 5400 /++ Ditto +/ 5401 C1[] translate(C1, S, C2 = immutable char)(C1[] str, 5402 in S[dchar] transTable, 5403 const(C2)[] toRemove = null) @safe pure 5404 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2) 5405 { 5406 import std.array : appender; 5407 auto buffer = appender!(C1[])(); 5408 translateImpl(str, transTable, toRemove, buffer); 5409 return buffer.data; 5410 } 5411 5412 @system pure unittest 5413 { 5414 import std.conv : to; 5415 import std.exception : assertCTFEable; 5416 5417 assertCTFEable!( 5418 { 5419 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 5420 wchar[], const(wchar)[], immutable(wchar)[], 5421 dchar[], const(dchar)[], immutable(dchar)[])) 5422 {(){ // workaround slow optimizations for large functions 5423 // https://issues.dlang.org/show_bug.cgi?id=2396 5424 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) == 5425 to!S("yellowe4242o wor42d")); 5426 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 5427 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d")); 5428 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) == 5429 to!S("yellowe4242o \U00010143 wor42d")); 5430 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 5431 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d")); 5432 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) == 5433 to!S("ello \U00010143 world")); 5434 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) == 5435 to!S("hello world")); 5436 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world")); 5437 5438 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 5439 wchar[], const(wchar)[], immutable(wchar)[], 5440 dchar[], const(dchar)[], immutable(dchar)[])) 5441 (){ // workaround slow optimizations for large functions 5442 // https://issues.dlang.org/show_bug.cgi?id=2396 5443 static foreach (R; AliasSeq!(string[dchar], const string[dchar], 5444 immutable string[dchar])) 5445 {{ 5446 R tt = ['h' : "yellow", 'l' : "42"]; 5447 assert(translate(to!S("hello world"), tt, to!T("r")) == 5448 to!S("yellowe4242o wo42d")); 5449 assert(translate(to!S("hello world"), tt, to!T("helo")) == 5450 to!S(" wrd")); 5451 assert(translate(to!S("hello world"), tt, to!T("y42")) == 5452 to!S("yellowe4242o wor42d")); 5453 assert(translate(to!S("hello world"), tt, to!T("hello world")) == 5454 to!S("")); 5455 assert(translate(to!S("hello world"), tt, to!T("42")) == 5456 to!S("yellowe4242o wor42d")); 5457 }} 5458 }(); 5459 5460 auto s = to!S("hello world"); 5461 string[dchar] transTable = ['h' : "silly", 'l' : "putty"]; 5462 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 5463 assert(translate(s, transTable) == "sillyeputtyputtyo worputtyd"); 5464 }();} 5465 }); 5466 } 5467 5468 /++ 5469 This is an overload of `translate` which takes an existing buffer to write the contents to. 5470 5471 Params: 5472 str = The original string. 5473 transTable = The AA indicating which characters to replace and what to 5474 replace them with. 5475 toRemove = The characters to remove from the string. 5476 buffer = An output range to write the contents to. 5477 +/ 5478 void translate(C1, C2 = immutable char, Buffer)(const(C1)[] str, 5479 in dchar[dchar] transTable, 5480 const(C2)[] toRemove, 5481 Buffer buffer) 5482 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1)) 5483 { 5484 translateImpl(str, transTable, toRemove, buffer); 5485 } 5486 5487 /// 5488 @safe pure unittest 5489 { 5490 import std.array : appender; 5491 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5492 auto buffer = appender!(dchar[])(); 5493 translate("hello world", transTable1, null, buffer); 5494 assert(buffer.data == "h5ll7 w7rld"); 5495 5496 buffer.clear(); 5497 translate("hello world", transTable1, "low", buffer); 5498 assert(buffer.data == "h5 rd"); 5499 5500 buffer.clear(); 5501 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5502 translate("hello world", transTable2, null, buffer); 5503 assert(buffer.data == "h5llorange worangerld"); 5504 } 5505 5506 // https://issues.dlang.org/show_bug.cgi?id=13018 5507 @safe pure unittest 5508 { 5509 import std.array : appender; 5510 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5511 auto buffer = appender!(dchar[])(); 5512 translate("hello world", transTable1, null, buffer); 5513 assert(buffer.data == "h5ll7 w7rld"); 5514 5515 buffer.clear(); 5516 translate("hello world", transTable1, "low", buffer); 5517 assert(buffer.data == "h5 rd"); 5518 5519 buffer.clear(); 5520 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5521 translate("hello world", transTable2, null, buffer); 5522 assert(buffer.data == "h5llorange worangerld"); 5523 } 5524 5525 /++ Ditto +/ 5526 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str, 5527 in S[dchar] transTable, 5528 const(C2)[] toRemove, 5529 Buffer buffer) 5530 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S)) 5531 { 5532 translateImpl(str, transTable, toRemove, buffer); 5533 } 5534 5535 private void translateImpl(C1, T, C2, Buffer)(const(C1)[] str, 5536 scope T transTable, 5537 const(C2)[] toRemove, 5538 Buffer buffer) 5539 { 5540 bool[dchar] removeTable; 5541 5542 foreach (dchar c; toRemove) 5543 removeTable[c] = true; 5544 5545 foreach (dchar c; str) 5546 { 5547 if (c in removeTable) 5548 continue; 5549 5550 auto newC = c in transTable; 5551 5552 if (newC) 5553 put(buffer, *newC); 5554 else 5555 put(buffer, c); 5556 } 5557 } 5558 5559 /++ 5560 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It 5561 will $(I not) work with Unicode. It exists as an optimization for the 5562 cases where Unicode processing is not necessary. 5563 5564 Unlike the other overloads of $(LREF _translate), this one does not take 5565 an AA. Rather, it takes a `string` generated by $(LREF makeTransTable). 5566 5567 The array generated by `makeTransTable` is `256` elements long such that 5568 the index is equal to the ASCII character being replaced and the value is 5569 equal to the character that it's being replaced with. Note that translate 5570 does not decode any of the characters, so you can actually pass it Extended 5571 ASCII characters if you want to (ASCII only actually uses `128` 5572 characters), but be warned that Extended ASCII characters are not valid 5573 Unicode and therefore will result in a `UTFException` being thrown from 5574 most other Phobos functions. 5575 5576 Also, because no decoding occurs, it is possible to use this overload to 5577 translate ASCII characters within a proper UTF-8 string without altering the 5578 other, non-ASCII characters. It's replacing any code unit greater than 5579 `127` with another code unit or replacing any code unit with another code 5580 unit greater than `127` which will cause UTF validation issues. 5581 5582 See_Also: 5583 $(LREF tr), 5584 $(REF replace, std,array), 5585 $(REF substitute, std,algorithm,iteration) 5586 5587 Params: 5588 str = The original string. 5589 transTable = The string indicating which characters to replace and what 5590 to replace them with. It is generated by $(LREF makeTransTable). 5591 toRemove = The characters to remove from the string. 5592 +/ 5593 C[] translate(C = immutable char)(scope const(char)[] str, scope const(char)[] transTable, 5594 scope const(char)[] toRemove = null) @trusted pure nothrow 5595 if (is(immutable C == immutable char)) 5596 in 5597 { 5598 import std.conv : to; 5599 assert(transTable.length == 256, "transTable had invalid length of " ~ 5600 to!string(transTable.length)); 5601 } 5602 do 5603 { 5604 bool[256] remTable = false; 5605 5606 foreach (char c; toRemove) 5607 remTable[c] = true; 5608 5609 size_t count = 0; 5610 foreach (char c; str) 5611 { 5612 if (!remTable[c]) 5613 ++count; 5614 } 5615 5616 auto buffer = new char[count]; 5617 5618 size_t i = 0; 5619 foreach (char c; str) 5620 { 5621 if (!remTable[c]) 5622 buffer[i++] = transTable[c]; 5623 } 5624 5625 return cast(C[])(buffer); 5626 } 5627 5628 /// 5629 @safe pure nothrow unittest 5630 { 5631 auto transTable1 = makeTrans("eo5", "57q"); 5632 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5633 5634 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5635 } 5636 5637 /** 5638 * Do same thing as $(LREF makeTransTable) but allocate the translation table 5639 * on the GC heap. 5640 * 5641 * Use $(LREF makeTransTable) instead. 5642 */ 5643 string makeTrans(scope const(char)[] from, scope const(char)[] to) @trusted pure nothrow 5644 { 5645 return makeTransTable(from, to)[].idup; 5646 } 5647 5648 /// 5649 @safe pure nothrow unittest 5650 { 5651 auto transTable1 = makeTrans("eo5", "57q"); 5652 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5653 5654 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5655 } 5656 5657 /******* 5658 * Construct 256 character translation table, where characters in from[] are replaced 5659 * by corresponding characters in to[]. 5660 * 5661 * Params: 5662 * from = array of chars, less than or equal to 256 in length 5663 * to = corresponding array of chars to translate to 5664 * Returns: 5665 * translation array 5666 */ 5667 char[256] makeTransTable(scope const(char)[] from, scope const(char)[] to) @safe pure nothrow @nogc 5668 in 5669 { 5670 import std.ascii : isASCII; 5671 assert(from.length == to.length, "from.length must match to.length"); 5672 assert(from.length <= 256, "from.length must be <= 256"); 5673 foreach (char c; from) 5674 assert(isASCII(c), 5675 "all characters in from must be valid ascii character"); 5676 foreach (char c; to) 5677 assert(isASCII(c), 5678 "all characters in to must be valid ascii character"); 5679 } 5680 do 5681 { 5682 char[256] result = void; 5683 5684 foreach (i; 0 .. result.length) 5685 result[i] = cast(char) i; 5686 foreach (i, c; from) 5687 result[c] = to[i]; 5688 return result; 5689 } 5690 5691 /// 5692 @safe pure unittest 5693 { 5694 assert(translate("hello world", makeTransTable("hl", "q5")) == "qe55o wor5d"); 5695 assert(translate("hello world", makeTransTable("12345", "67890")) == "hello world"); 5696 } 5697 5698 @safe pure unittest 5699 { 5700 import std.conv : to; 5701 import std.exception : assertCTFEable; 5702 5703 assertCTFEable!( 5704 { 5705 static foreach (C; AliasSeq!(char, const char, immutable char)) 5706 {{ 5707 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d")); 5708 5709 auto s = to!(C[])("hello world"); 5710 auto transTable = makeTransTable("hl", "q5"); 5711 static assert(is(typeof(s) == typeof(translate!C(s, transTable)))); 5712 assert(translate(s, transTable) == "qe55o wor5d"); 5713 }} 5714 5715 static foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[])) 5716 { 5717 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d")); 5718 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) == 5719 to!S("qe55o \U00010143 wor5d")); 5720 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod")); 5721 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world")); 5722 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world")); 5723 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) == 5724 to!S("hello \U00010143 world")); 5725 5726 static foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[])) 5727 { 5728 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) == 5729 to!S("qe55o wo5d")); 5730 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) == 5731 to!S("qe55o \U00010143 wo5d")); 5732 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) == 5733 to!S(" wrd")); 5734 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) == 5735 to!S("qe55o wor5d")); 5736 } 5737 } 5738 }); 5739 } 5740 5741 /++ 5742 This is an $(I $(RED ASCII-only)) overload of `translate` which takes an existing buffer to write the contents to. 5743 5744 Params: 5745 str = The original string. 5746 transTable = The string indicating which characters to replace and what 5747 to replace them with. It is generated by $(LREF makeTransTable). 5748 toRemove = The characters to remove from the string. 5749 buffer = An output range to write the contents to. 5750 +/ 5751 void translate(C = immutable char, Buffer)(scope const(char)[] str, scope const(char)[] transTable, 5752 scope const(char)[] toRemove, Buffer buffer) @trusted pure 5753 if (is(immutable C == immutable char) && isOutputRange!(Buffer, char)) 5754 in 5755 { 5756 assert(transTable.length == 256, format! 5757 "transTable.length %s must equal 256"(transTable.length)); 5758 } 5759 do 5760 { 5761 bool[256] remTable = false; 5762 5763 foreach (char c; toRemove) 5764 remTable[c] = true; 5765 5766 foreach (char c; str) 5767 { 5768 if (!remTable[c]) 5769 put(buffer, transTable[c]); 5770 } 5771 } 5772 5773 /// 5774 @safe pure unittest 5775 { 5776 import std.array : appender; 5777 auto buffer = appender!(char[])(); 5778 auto transTable1 = makeTransTable("eo5", "57q"); 5779 translate("hello world", transTable1, null, buffer); 5780 assert(buffer.data == "h5ll7 w7rld"); 5781 5782 buffer.clear(); 5783 translate("hello world", transTable1, "low", buffer); 5784 assert(buffer.data == "h5 rd"); 5785 } 5786 5787 /********************************************** 5788 * Return string that is the 'successor' to s[]. 5789 * If the rightmost character is a-zA-Z0-9, it is incremented within 5790 * its case or digits. If it generates a carry, the process is 5791 * repeated with the one to its immediate left. 5792 */ 5793 5794 S succ(S)(S s) @safe pure 5795 if (isSomeString!S) 5796 { 5797 import std.ascii : isAlphaNum; 5798 5799 if (s.length && isAlphaNum(s[$ - 1])) 5800 { 5801 auto r = s.dup; 5802 size_t i = r.length - 1; 5803 5804 while (1) 5805 { 5806 dchar c = s[i]; 5807 dchar carry; 5808 5809 switch (c) 5810 { 5811 case '9': 5812 c = '0'; 5813 carry = '1'; 5814 goto Lcarry; 5815 case 'z': 5816 case 'Z': 5817 c -= 'Z' - 'A'; 5818 carry = c; 5819 Lcarry: 5820 r[i] = cast(char) c; 5821 if (i == 0) 5822 { 5823 auto t = new typeof(r[0])[r.length + 1]; 5824 t[0] = cast(char) carry; 5825 t[1 .. $] = r[]; 5826 return t; 5827 } 5828 i--; 5829 break; 5830 5831 default: 5832 if (isAlphaNum(c)) 5833 r[i]++; 5834 return r; 5835 } 5836 } 5837 } 5838 return s; 5839 } 5840 5841 /// 5842 @safe pure unittest 5843 { 5844 assert(succ("1") == "2"); 5845 assert(succ("9") == "10"); 5846 assert(succ("999") == "1000"); 5847 assert(succ("zz99") == "aaa00"); 5848 } 5849 5850 @safe pure unittest 5851 { 5852 import std.conv : to; 5853 import std.exception : assertCTFEable; 5854 5855 assertCTFEable!( 5856 { 5857 assert(succ(string.init) is null); 5858 assert(succ("!@#$%") == "!@#$%"); 5859 assert(succ("1") == "2"); 5860 assert(succ("9") == "10"); 5861 assert(succ("999") == "1000"); 5862 assert(succ("zz99") == "aaa00"); 5863 }); 5864 } 5865 5866 5867 /++ 5868 Replaces the characters in `str` which are in `from` with the 5869 the corresponding characters in `to` and returns the resulting string. 5870 5871 `tr` is based on 5872 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr), 5873 though it doesn't do everything that the Posix utility does. 5874 5875 Params: 5876 str = The original string. 5877 from = The characters to replace. 5878 to = The characters to replace with. 5879 modifiers = String containing modifiers. 5880 5881 Modifiers: 5882 $(BOOKTABLE, 5883 $(TR $(TD Modifier) $(TD Description)) 5884 $(TR $(TD `'c'`) $(TD Complement the list of characters in `from`)) 5885 $(TR $(TD `'d'`) $(TD Removes matching characters with no corresponding 5886 replacement in `to`)) 5887 $(TR $(TD `'s'`) $(TD Removes adjacent duplicates in the replaced 5888 characters)) 5889 ) 5890 5891 If the modifier `'d'` is present, then the number of characters in 5892 `to` may be only `0` or `1`. 5893 5894 If the modifier `'d'` is $(I not) present, and `to` is empty, then 5895 `to` is taken to be the same as `from`. 5896 5897 If the modifier `'d'` is $(I not) present, and `to` is shorter than 5898 `from`, then `to` is extended by replicating the last character in 5899 `to`. 5900 5901 Both `from` and `to` may contain ranges using the `'-'` character 5902 (e.g. `"a-d"` is synonymous with `"abcd"`.) Neither accept a leading 5903 `'^'` as meaning the complement of the string (use the `'c'` modifier 5904 for that). 5905 5906 See_Also: 5907 $(LREF translate), 5908 $(REF replace, std,array), 5909 $(REF substitute, std,algorithm,iteration) 5910 +/ 5911 C1[] tr(C1, C2, C3, C4 = immutable char) 5912 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null) 5913 { 5914 import std.array : appender; 5915 import std.conv : conv_to = to; 5916 import std.utf : decode; 5917 5918 bool mod_c; 5919 bool mod_d; 5920 bool mod_s; 5921 5922 foreach (char c; modifiers) 5923 { 5924 switch (c) 5925 { 5926 case 'c': mod_c = 1; break; // complement 5927 case 'd': mod_d = 1; break; // delete unreplaced chars 5928 case 's': mod_s = 1; break; // squeeze duplicated replaced chars 5929 default: assert(false, "modifier must be one of ['c', 'd', 's'] not " 5930 ~ c); 5931 } 5932 } 5933 5934 if (to.empty && !mod_d) 5935 to = conv_to!(typeof(to))(from); 5936 5937 auto result = appender!(C1[])(); 5938 bool modified; 5939 dchar lastc; 5940 5941 foreach (dchar c; str) 5942 { 5943 dchar lastf; 5944 dchar lastt; 5945 dchar newc; 5946 int n = 0; 5947 5948 for (size_t i = 0; i < from.length; ) 5949 { 5950 immutable f = decode(from, i); 5951 if (f == '-' && lastf != dchar.init && i < from.length) 5952 { 5953 immutable nextf = decode(from, i); 5954 if (lastf <= c && c <= nextf) 5955 { 5956 n += c - lastf - 1; 5957 if (mod_c) 5958 goto Lnotfound; 5959 goto Lfound; 5960 } 5961 n += nextf - lastf; 5962 lastf = lastf.init; 5963 continue; 5964 } 5965 5966 if (c == f) 5967 { if (mod_c) 5968 goto Lnotfound; 5969 goto Lfound; 5970 } 5971 lastf = f; 5972 n++; 5973 } 5974 if (!mod_c) 5975 goto Lnotfound; 5976 n = 0; // consider it 'found' at position 0 5977 5978 Lfound: 5979 { // create a new scope so that gotos don't skip of declaration of nextt 5980 // Find the nth character in to[] 5981 dchar nextt; 5982 for (size_t i = 0; i < to.length; ) 5983 { 5984 immutable t = decode(to, i); 5985 if (t == '-' && lastt != dchar.init && i < to.length) 5986 { 5987 nextt = decode(to, i); 5988 n -= nextt - lastt; 5989 if (n < 0) 5990 { 5991 newc = nextt + n + 1; 5992 goto Lnewc; 5993 } 5994 lastt = dchar.init; 5995 continue; 5996 } 5997 if (n == 0) 5998 { newc = t; 5999 goto Lnewc; 6000 } 6001 lastt = t; 6002 nextt = t; 6003 n--; 6004 } 6005 if (mod_d) 6006 continue; 6007 newc = nextt; 6008 } 6009 Lnewc: 6010 if (mod_s && modified && newc == lastc) 6011 continue; 6012 result.put(newc); 6013 assert(newc != dchar.init, "character must not be dchar.init"); 6014 modified = true; 6015 lastc = newc; 6016 continue; 6017 6018 Lnotfound: 6019 result.put(c); 6020 lastc = c; 6021 modified = false; 6022 } 6023 6024 return result.data; 6025 } 6026 6027 /// 6028 @safe pure unittest 6029 { 6030 assert(tr("abcdef", "cd", "CD") == "abCDef"); 6031 assert(tr("1st March, 2018", "March", "MAR", "s") == "1st MAR, 2018"); 6032 assert(tr("abcdef", "ef", "", "d") == "abcd"); 6033 assert(tr("14-Jul-87", "a-zA-Z", " ", "cs") == " Jul "); 6034 } 6035 6036 @safe pure unittest 6037 { 6038 import std.algorithm.comparison : equal; 6039 import std.conv : to; 6040 import std.exception : assertCTFEable; 6041 6042 // Complete list of test types; too slow to test'em all 6043 // alias TestTypes = AliasSeq!( 6044 // char[], const( char)[], immutable( char)[], 6045 // wchar[], const(wchar)[], immutable(wchar)[], 6046 // dchar[], const(dchar)[], immutable(dchar)[]); 6047 6048 // Reduced list of test types 6049 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]); 6050 6051 assertCTFEable!( 6052 { 6053 foreach (S; TestTypes) 6054 { 6055 foreach (T; TestTypes) 6056 { 6057 foreach (U; TestTypes) 6058 { 6059 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef")); 6060 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef")); 6061 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx")); 6062 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx")); 6063 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx")); 6064 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef")); 6065 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd")); 6066 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye")); 6067 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye")); 6068 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul ")); 6069 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc")); 6070 } 6071 } 6072 6073 auto s = to!S("hello world"); 6074 static assert(is(typeof(s) == typeof(tr(s, "he", "if")))); 6075 assert(tr(s, "he", "if") == "ifllo world"); 6076 } 6077 }); 6078 } 6079 6080 @system pure unittest 6081 { 6082 import core.exception : AssertError; 6083 import std.exception : assertThrown; 6084 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X")); 6085 } 6086 6087 /** 6088 * Takes a string `s` and determines if it represents a number. This function 6089 * also takes an optional parameter, `bAllowSep`, which will accept the 6090 * separator characters `','` and `'__'` within the string. But these 6091 * characters should be stripped from the string before using any 6092 * of the conversion functions like `to!int()`, `to!float()`, and etc 6093 * else an error will occur. 6094 * 6095 * Also please note, that no spaces are allowed within the string 6096 * anywhere whether it's a leading, trailing, or embedded space(s), 6097 * thus they too must be stripped from the string before using this 6098 * function, or any of the conversion functions. 6099 * 6100 * Params: 6101 * s = the string or random access range to check 6102 * bAllowSep = accept separator characters or not 6103 * 6104 * Returns: 6105 * `bool` 6106 */ 6107 bool isNumeric(S)(S s, bool bAllowSep = false) 6108 if (isSomeString!S || 6109 (isRandomAccessRange!S && 6110 hasSlicing!S && 6111 isSomeChar!(ElementType!S) && 6112 !isInfinite!S)) 6113 { 6114 import std.algorithm.comparison : among; 6115 import std.ascii : isASCII; 6116 6117 // ASCII only case insensitive comparison with two ranges 6118 static bool asciiCmp(S1)(S1 a, string b) 6119 { 6120 import std.algorithm.comparison : equal; 6121 import std.algorithm.iteration : map; 6122 import std.ascii : toLower; 6123 import std.utf : byChar; 6124 return a.map!toLower.equal(b.byChar.map!toLower); 6125 } 6126 6127 // auto-decoding special case, we're only comparing characters 6128 // in the ASCII range so there's no reason to decode 6129 static if (isSomeString!S) 6130 { 6131 import std.utf : byCodeUnit; 6132 auto codeUnits = s.byCodeUnit; 6133 } 6134 else 6135 { 6136 alias codeUnits = s; 6137 } 6138 6139 if (codeUnits.empty) 6140 return false; 6141 6142 // Check for NaN (Not a Number) and for Infinity 6143 if (codeUnits.among!((a, b) => asciiCmp(a.save, b)) 6144 ("nan", "nani", "nan+nani", "inf", "-inf")) 6145 return true; 6146 6147 immutable frontResult = codeUnits.front; 6148 if (frontResult == '-' || frontResult == '+') 6149 codeUnits.popFront; 6150 6151 immutable iLen = codeUnits.length; 6152 bool bDecimalPoint, bExponent, bComplex, sawDigits; 6153 6154 for (size_t i = 0; i < iLen; i++) 6155 { 6156 immutable c = codeUnits[i]; 6157 6158 if (!c.isASCII) 6159 return false; 6160 6161 // Digits are good, skip to the next character 6162 if (c >= '0' && c <= '9') 6163 { 6164 sawDigits = true; 6165 continue; 6166 } 6167 6168 // Check for the complex type, and if found 6169 // reset the flags for checking the 2nd number. 6170 if (c == '+') 6171 { 6172 if (!i) 6173 return false; 6174 bDecimalPoint = false; 6175 bExponent = false; 6176 bComplex = true; 6177 sawDigits = false; 6178 continue; 6179 } 6180 6181 // Allow only one exponent per number 6182 if (c == 'e' || c == 'E') 6183 { 6184 // A 2nd exponent found, return not a number 6185 if (bExponent || i + 1 >= iLen) 6186 return false; 6187 // Look forward for the sign, and if 6188 // missing then this is not a number. 6189 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+') 6190 return false; 6191 bExponent = true; 6192 i++; 6193 continue; 6194 } 6195 // Allow only one decimal point per number to be used 6196 if (c == '.') 6197 { 6198 // A 2nd decimal point found, return not a number 6199 if (bDecimalPoint) 6200 return false; 6201 bDecimalPoint = true; 6202 continue; 6203 } 6204 // Check for ending literal characters: "f,u,l,i,ul,fi,li", 6205 // and whether they're being used with the correct datatype. 6206 if (i == iLen - 2) 6207 { 6208 if (!sawDigits) 6209 return false; 6210 // Integer Whole Number 6211 if (asciiCmp(codeUnits[i .. iLen], "ul") && 6212 (!bDecimalPoint && !bExponent && !bComplex)) 6213 return true; 6214 // Floating-Point Number 6215 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") && 6216 (bDecimalPoint || bExponent || bComplex)) 6217 return true; 6218 if (asciiCmp(codeUnits[i .. iLen], "ul") && 6219 (bDecimalPoint || bExponent || bComplex)) 6220 return false; 6221 // Could be a Integer or a Float, thus 6222 // all these suffixes are valid for both 6223 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b)) 6224 ("ul", "fi", "li") != 0; 6225 } 6226 if (i == iLen - 1) 6227 { 6228 if (!sawDigits) 6229 return false; 6230 // Integer Whole Number 6231 if (c.among!('u', 'l', 'U', 'L')() && 6232 (!bDecimalPoint && !bExponent && !bComplex)) 6233 return true; 6234 // Check to see if the last character in the string 6235 // is the required 'i' character 6236 if (bComplex) 6237 return c.among!('i', 'I')() != 0; 6238 // Floating-Point Number 6239 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0; 6240 } 6241 // Check if separators are allowed to be in the numeric string 6242 if (!bAllowSep || !c.among!('_', ',')()) 6243 return false; 6244 } 6245 6246 return sawDigits; 6247 } 6248 6249 /** 6250 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong) 6251 * ['+'|'-']digit(s)[U|L|UL] 6252 */ 6253 @safe @nogc pure nothrow unittest 6254 { 6255 assert(isNumeric("123")); 6256 assert(isNumeric("123UL")); 6257 assert(isNumeric("123L")); 6258 assert(isNumeric("+123U")); 6259 assert(isNumeric("-123L")); 6260 } 6261 6262 /** 6263 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal) 6264 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 6265 * or [nan|nani|inf|-inf] 6266 */ 6267 @safe @nogc pure nothrow unittest 6268 { 6269 assert(isNumeric("+123")); 6270 assert(isNumeric("-123.01")); 6271 assert(isNumeric("123.3e-10f")); 6272 assert(isNumeric("123.3e-10fi")); 6273 assert(isNumeric("123.3e-10L")); 6274 6275 assert(isNumeric("nan")); 6276 assert(isNumeric("nani")); 6277 assert(isNumeric("-inf")); 6278 } 6279 6280 /** 6281 * Floating-Point Number: (cfloat, cdouble, and creal) 6282 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+] 6283 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 6284 * or [nan|nani|nan+nani|inf|-inf] 6285 */ 6286 @safe @nogc pure nothrow unittest 6287 { 6288 assert(isNumeric("-123e-1+456.9e-10Li")); 6289 assert(isNumeric("+123e+10+456i")); 6290 assert(isNumeric("123+456")); 6291 } 6292 6293 @safe @nogc pure nothrow unittest 6294 { 6295 assert(!isNumeric("F")); 6296 assert(!isNumeric("L")); 6297 assert(!isNumeric("U")); 6298 assert(!isNumeric("i")); 6299 assert(!isNumeric("fi")); 6300 assert(!isNumeric("ul")); 6301 assert(!isNumeric("li")); 6302 assert(!isNumeric(".")); 6303 assert(!isNumeric("-")); 6304 assert(!isNumeric("+")); 6305 assert(!isNumeric("e-")); 6306 assert(!isNumeric("e+")); 6307 assert(!isNumeric(".f")); 6308 assert(!isNumeric("e+f")); 6309 assert(!isNumeric("++1")); 6310 assert(!isNumeric("")); 6311 assert(!isNumeric("1E+1E+1")); 6312 assert(!isNumeric("1E1")); 6313 assert(!isNumeric("\x81")); 6314 } 6315 6316 // Test string types 6317 @safe unittest 6318 { 6319 import std.conv : to; 6320 6321 static foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[])) 6322 { 6323 assert("123".to!T.isNumeric()); 6324 assert("123UL".to!T.isNumeric()); 6325 assert("123fi".to!T.isNumeric()); 6326 assert("123li".to!T.isNumeric()); 6327 assert(!"--123L".to!T.isNumeric()); 6328 } 6329 } 6330 6331 // test ranges 6332 @system pure unittest 6333 { 6334 import std.range : refRange; 6335 import std.utf : byCodeUnit; 6336 6337 assert("123".byCodeUnit.isNumeric()); 6338 assert("123UL".byCodeUnit.isNumeric()); 6339 assert("123fi".byCodeUnit.isNumeric()); 6340 assert("123li".byCodeUnit.isNumeric()); 6341 assert(!"--123L".byCodeUnit.isNumeric()); 6342 6343 dstring z = "0"; 6344 assert(isNumeric(refRange(&z))); 6345 6346 dstring nani = "nani"; 6347 assert(isNumeric(refRange(&nani))); 6348 } 6349 6350 /// isNumeric works with CTFE 6351 @safe pure unittest 6352 { 6353 enum a = isNumeric("123.00E-5+1234.45E-12Li"); 6354 enum b = isNumeric("12345xxxx890"); 6355 6356 static assert( a); 6357 static assert(!b); 6358 } 6359 6360 @system unittest 6361 { 6362 import std.conv : to; 6363 import std.exception : assertCTFEable; 6364 6365 assertCTFEable!( 6366 { 6367 // Test the isNumeric(in string) function 6368 assert(isNumeric("1")); 6369 assert(isNumeric("1.0")); 6370 assert(isNumeric("1e-1")); 6371 assert(!isNumeric("12345xxxx890")); 6372 assert(isNumeric("567L")); 6373 assert(isNumeric("23UL")); 6374 assert(!isNumeric("-123..56f")); 6375 assert(!isNumeric("12.3.5.6")); 6376 assert(!isNumeric(" 12.356")); 6377 assert(!isNumeric("123 5.6")); 6378 assert(isNumeric("1233E-1+1.0e-1i")); 6379 6380 assert(isNumeric("123.00E-5+1234.45E-12Li")); 6381 assert(!isNumeric("123.00e-5+1234.45E-12iL")); 6382 assert(!isNumeric("123.00e-5+1234.45e-12uL")); 6383 assert(!isNumeric("123.00E-5+1234.45e-12lu")); 6384 6385 assert(isNumeric("123fi")); 6386 assert(isNumeric("123li")); 6387 assert(!isNumeric("--123L")); 6388 assert(!isNumeric("+123.5UL")); 6389 assert(isNumeric("123f")); 6390 assert(!isNumeric("123.u")); 6391 6392 // @@@BUG@@ to!string(float) is not CTFEable. 6393 // Related: formatValue(T) if (is(FloatingPointTypeOf!T)) 6394 if (!__ctfe) 6395 { 6396 assert(isNumeric(to!string(real.nan))); 6397 assert(isNumeric(to!string(-real.infinity))); 6398 } 6399 6400 string s = "$250.99-"; 6401 assert(isNumeric(s[1 .. $ - 2])); 6402 assert(!isNumeric(s)); 6403 assert(!isNumeric(s[0 .. $ - 1])); 6404 }); 6405 6406 assert(!isNumeric("-")); 6407 assert(!isNumeric("+")); 6408 } 6409 6410 /***************************** 6411 * Soundex algorithm. 6412 * 6413 * The Soundex algorithm converts a word into 4 characters 6414 * based on how the word sounds phonetically. The idea is that 6415 * two spellings that sound alike will have the same Soundex 6416 * value, which means that Soundex can be used for fuzzy matching 6417 * of names. 6418 * 6419 * Params: 6420 * str = String or InputRange to convert to Soundex representation. 6421 * 6422 * Returns: 6423 * The four character array with the Soundex result in it. 6424 * The array has zero's in it if there is no Soundex representation for the string. 6425 * 6426 * See_Also: 6427 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia), 6428 * $(LUCKY The Soundex Indexing System) 6429 * $(LREF soundex) 6430 * 6431 * Note: 6432 * Only works well with English names. 6433 */ 6434 char[4] soundexer(Range)(Range str) 6435 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 6436 !isConvertibleToString!Range) 6437 { 6438 alias C = Unqual!(ElementEncodingType!Range); 6439 6440 static immutable dex = 6441 // ABCDEFGHIJKLMNOPQRSTUVWXYZ 6442 "01230120022455012623010202"; 6443 6444 char[4] result = void; 6445 size_t b = 0; 6446 C lastc; 6447 foreach (C c; str) 6448 { 6449 if (c >= 'a' && c <= 'z') 6450 c -= 'a' - 'A'; 6451 else if (c >= 'A' && c <= 'Z') 6452 { 6453 } 6454 else 6455 { 6456 lastc = lastc.init; 6457 continue; 6458 } 6459 if (b == 0) 6460 { 6461 result[0] = cast(char) c; 6462 b++; 6463 lastc = dex[c - 'A']; 6464 } 6465 else 6466 { 6467 if (c == 'H' || c == 'W') 6468 continue; 6469 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 6470 lastc = lastc.init; 6471 c = dex[c - 'A']; 6472 if (c != '0' && c != lastc) 6473 { 6474 result[b] = cast(char) c; 6475 b++; 6476 lastc = c; 6477 } 6478 if (b == 4) 6479 goto Lret; 6480 } 6481 } 6482 if (b == 0) 6483 result[] = 0; 6484 else 6485 result[b .. 4] = '0'; 6486 Lret: 6487 return result; 6488 } 6489 6490 /// ditto 6491 char[4] soundexer(Range)(auto ref Range str) 6492 if (isConvertibleToString!Range) 6493 { 6494 return soundexer!(StringTypeOf!Range)(str); 6495 } 6496 6497 /// 6498 @safe unittest 6499 { 6500 assert(soundexer("Gauss") == "G200"); 6501 assert(soundexer("Ghosh") == "G200"); 6502 6503 assert(soundexer("Robert") == "R163"); 6504 assert(soundexer("Rupert") == "R163"); 6505 6506 assert(soundexer("0123^&^^**&^") == ['\0', '\0', '\0', '\0']); 6507 } 6508 6509 /***************************** 6510 * Like $(LREF soundexer), but with different parameters 6511 * and return value. 6512 * 6513 * Params: 6514 * str = String to convert to Soundex representation. 6515 * buffer = Optional 4 char array to put the resulting Soundex 6516 * characters into. If null, the return value 6517 * buffer will be allocated on the heap. 6518 * Returns: 6519 * The four character array with the Soundex result in it. 6520 * Returns null if there is no Soundex representation for the string. 6521 * See_Also: 6522 * $(LREF soundexer) 6523 */ 6524 char[] soundex(scope const(char)[] str, return scope char[] buffer = null) 6525 @safe pure nothrow 6526 in 6527 { 6528 assert(buffer is null || buffer.length >= 4); 6529 } 6530 out (result) 6531 { 6532 if (result !is null) 6533 { 6534 assert(result.length == 4, "Result must have length of 4"); 6535 assert(result[0] >= 'A' && result[0] <= 'Z', "The first character of " 6536 ~ " the result must be an upper character not " ~ result); 6537 foreach (char c; result[1 .. 4]) 6538 assert(c >= '0' && c <= '6', "the last three character of the" 6539 ~ " result must be number between 0 and 6 not " ~ result); 6540 } 6541 } 6542 do 6543 { 6544 char[4] result = soundexer(str); 6545 if (result[0] == 0) 6546 return null; 6547 if (buffer is null) 6548 buffer = new char[4]; 6549 buffer[] = result[]; 6550 return buffer; 6551 } 6552 6553 /// 6554 @safe unittest 6555 { 6556 assert(soundex("Gauss") == "G200"); 6557 assert(soundex("Ghosh") == "G200"); 6558 6559 assert(soundex("Robert") == "R163"); 6560 assert(soundex("Rupert") == "R163"); 6561 6562 assert(soundex("0123^&^^**&^") == null); 6563 } 6564 6565 @safe pure nothrow unittest 6566 { 6567 import std.exception : assertCTFEable; 6568 assertCTFEable!( 6569 { 6570 char[4] buffer; 6571 6572 assert(soundex(null) == null); 6573 assert(soundex("") == null); 6574 assert(soundex("0123^&^^**&^") == null); 6575 assert(soundex("Euler") == "E460"); 6576 assert(soundex(" Ellery ") == "E460"); 6577 assert(soundex("Gauss") == "G200"); 6578 assert(soundex("Ghosh") == "G200"); 6579 assert(soundex("Hilbert") == "H416"); 6580 assert(soundex("Heilbronn") == "H416"); 6581 assert(soundex("Knuth") == "K530"); 6582 assert(soundex("Kant", buffer) == "K530"); 6583 assert(soundex("Lloyd") == "L300"); 6584 assert(soundex("Ladd") == "L300"); 6585 assert(soundex("Lukasiewicz", buffer) == "L222"); 6586 assert(soundex("Lissajous") == "L222"); 6587 assert(soundex("Robert") == "R163"); 6588 assert(soundex("Rupert") == "R163"); 6589 assert(soundex("Rubin") == "R150"); 6590 assert(soundex("Washington") == "W252"); 6591 assert(soundex("Lee") == "L000"); 6592 assert(soundex("Gutierrez") == "G362"); 6593 assert(soundex("Pfister") == "P236"); 6594 assert(soundex("Jackson") == "J250"); 6595 assert(soundex("Tymczak") == "T522"); 6596 assert(soundex("Ashcraft") == "A261"); 6597 6598 assert(soundex("Woo") == "W000"); 6599 assert(soundex("Pilgrim") == "P426"); 6600 assert(soundex("Flingjingwaller") == "F452"); 6601 assert(soundex("PEARSE") == "P620"); 6602 assert(soundex("PIERCE") == "P620"); 6603 assert(soundex("Price") == "P620"); 6604 assert(soundex("CATHY") == "C300"); 6605 assert(soundex("KATHY") == "K300"); 6606 assert(soundex("Jones") == "J520"); 6607 assert(soundex("johnsons") == "J525"); 6608 assert(soundex("Hardin") == "H635"); 6609 assert(soundex("Martinez") == "M635"); 6610 6611 import std.utf : byChar, byDchar, byWchar; 6612 assert(soundexer("Martinez".byChar ) == "M635"); 6613 assert(soundexer("Martinez".byWchar) == "M635"); 6614 assert(soundexer("Martinez".byDchar) == "M635"); 6615 }); 6616 } 6617 6618 @safe pure unittest 6619 { 6620 assert(testAliasedString!soundexer("Martinez")); 6621 } 6622 6623 6624 /*************************************************** 6625 * Construct an associative array consisting of all 6626 * abbreviations that uniquely map to the strings in values. 6627 * 6628 * This is useful in cases where the user is expected to type 6629 * in one of a known set of strings, and the program will helpfully 6630 * auto-complete the string once sufficient characters have been 6631 * entered that uniquely identify it. 6632 */ 6633 string[string] abbrev(string[] values) @safe pure 6634 { 6635 import std.algorithm.sorting : sort; 6636 6637 string[string] result; 6638 6639 // Make a copy when sorting so we follow COW principles. 6640 values = values.dup; 6641 sort(values); 6642 6643 size_t values_length = values.length; 6644 size_t lasti = values_length; 6645 size_t nexti; 6646 6647 string nv; 6648 string lv; 6649 6650 for (size_t i = 0; i < values_length; i = nexti) 6651 { 6652 string value = values[i]; 6653 6654 // Skip dups 6655 for (nexti = i + 1; nexti < values_length; nexti++) 6656 { 6657 nv = values[nexti]; 6658 if (value != values[nexti]) 6659 break; 6660 } 6661 6662 import std.utf : stride; 6663 6664 for (size_t j = 0; j < value.length; j += stride(value, j)) 6665 { 6666 string v = value[0 .. j]; 6667 6668 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) && 6669 (lasti == values_length || j > lv.length || v != lv[0 .. j])) 6670 { 6671 result[v] = value; 6672 } 6673 } 6674 result[value] = value; 6675 lasti = i; 6676 lv = value; 6677 } 6678 6679 return result; 6680 } 6681 6682 /// 6683 @safe unittest 6684 { 6685 import std.string; 6686 6687 static string[] list = [ "food", "foxy" ]; 6688 auto abbrevs = abbrev(list); 6689 assert(abbrevs == ["fox": "foxy", "food": "food", 6690 "foxy": "foxy", "foo": "food"]); 6691 } 6692 6693 6694 @system pure unittest 6695 { 6696 import std.algorithm.sorting : sort; 6697 import std.conv : to; 6698 import std.exception : assertCTFEable; 6699 6700 assertCTFEable!( 6701 { 6702 string[] values; 6703 values ~= "hello"; 6704 values ~= "hello"; 6705 values ~= "he"; 6706 6707 string[string] r; 6708 6709 r = abbrev(values); 6710 auto keys = r.keys.dup; 6711 sort(keys); 6712 6713 assert(keys.length == 4); 6714 assert(keys[0] == "he"); 6715 assert(keys[1] == "hel"); 6716 assert(keys[2] == "hell"); 6717 assert(keys[3] == "hello"); 6718 6719 assert(r[keys[0]] == "he"); 6720 assert(r[keys[1]] == "hello"); 6721 assert(r[keys[2]] == "hello"); 6722 assert(r[keys[3]] == "hello"); 6723 }); 6724 } 6725 6726 6727 /****************************************** 6728 * Compute _column number at the end of the printed form of the string, 6729 * assuming the string starts in the leftmost _column, which is numbered 6730 * starting from 0. 6731 * 6732 * Tab characters are expanded into enough spaces to bring the _column number 6733 * to the next multiple of tabsize. 6734 * If there are multiple lines in the string, the _column number of the last 6735 * line is returned. 6736 * 6737 * Params: 6738 * str = string or InputRange to be analyzed 6739 * tabsize = number of columns a tab character represents 6740 * 6741 * Returns: 6742 * column number 6743 */ 6744 6745 size_t column(Range)(Range str, in size_t tabsize = 8) 6746 if ((isInputRange!Range && isSomeChar!(ElementEncodingType!Range) || 6747 isNarrowString!Range) && 6748 !isConvertibleToString!Range) 6749 { 6750 static if (is(immutable ElementEncodingType!Range == immutable char)) 6751 { 6752 // decoding needed for chars 6753 import std.utf : byDchar; 6754 6755 return str.byDchar.column(tabsize); 6756 } 6757 else 6758 { 6759 // decoding not needed for wchars and dchars 6760 import std.uni : lineSep, paraSep, nelSep; 6761 6762 size_t column; 6763 6764 foreach (const c; str) 6765 { 6766 switch (c) 6767 { 6768 case '\t': 6769 column = (column + tabsize) / tabsize * tabsize; 6770 break; 6771 6772 case '\r': 6773 case '\n': 6774 case paraSep: 6775 case lineSep: 6776 case nelSep: 6777 column = 0; 6778 break; 6779 6780 default: 6781 column++; 6782 break; 6783 } 6784 } 6785 return column; 6786 } 6787 } 6788 6789 /// 6790 @safe pure unittest 6791 { 6792 import std.utf : byChar, byWchar, byDchar; 6793 6794 assert(column("1234 ") == 5); 6795 assert(column("1234 "w) == 5); 6796 assert(column("1234 "d) == 5); 6797 6798 assert(column("1234 ".byChar()) == 5); 6799 assert(column("1234 "w.byWchar()) == 5); 6800 assert(column("1234 "d.byDchar()) == 5); 6801 6802 // Tab stops are set at 8 spaces by default; tab characters insert enough 6803 // spaces to bring the column position to the next multiple of 8. 6804 assert(column("\t") == 8); 6805 assert(column("1\t") == 8); 6806 assert(column("\t1") == 9); 6807 assert(column("123\t") == 8); 6808 6809 // Other tab widths are possible by specifying it explicitly: 6810 assert(column("\t", 4) == 4); 6811 assert(column("1\t", 4) == 4); 6812 assert(column("\t1", 4) == 5); 6813 assert(column("123\t", 4) == 4); 6814 6815 // New lines reset the column number. 6816 assert(column("abc\n") == 0); 6817 assert(column("abc\n1") == 1); 6818 assert(column("abcdefg\r1234") == 4); 6819 assert(column("abc\u20281") == 1); 6820 assert(column("abc\u20291") == 1); 6821 assert(column("abc\u00851") == 1); 6822 assert(column("abc\u00861") == 5); 6823 } 6824 6825 size_t column(Range)(auto ref Range str, in size_t tabsize = 8) 6826 if (isConvertibleToString!Range) 6827 { 6828 return column!(StringTypeOf!Range)(str, tabsize); 6829 } 6830 6831 @safe pure unittest 6832 { 6833 assert(testAliasedString!column("abc\u00861")); 6834 } 6835 6836 @safe @nogc unittest 6837 { 6838 import std.conv : to; 6839 import std.exception : assertCTFEable; 6840 6841 assertCTFEable!( 6842 { 6843 assert(column(string.init) == 0); 6844 assert(column("") == 0); 6845 assert(column("\t") == 8); 6846 assert(column("abc\t") == 8); 6847 assert(column("12345678\t") == 16); 6848 }); 6849 } 6850 6851 /****************************************** 6852 * Wrap text into a paragraph. 6853 * 6854 * The input text string s is formed into a paragraph 6855 * by breaking it up into a sequence of lines, delineated 6856 * by \n, such that the number of columns is not exceeded 6857 * on each line. 6858 * The last line is terminated with a \n. 6859 * Params: 6860 * s = text string to be wrapped 6861 * columns = maximum number of _columns in the paragraph 6862 * firstindent = string used to _indent first line of the paragraph 6863 * indent = string to use to _indent following lines of the paragraph 6864 * tabsize = column spacing of tabs in firstindent[] and indent[] 6865 * Returns: 6866 * resulting paragraph as an allocated string 6867 */ 6868 6869 S wrap(S)(S s, in size_t columns = 80, S firstindent = null, 6870 S indent = null, in size_t tabsize = 8) 6871 if (isSomeString!S) 6872 { 6873 import std.uni : isWhite; 6874 typeof(s.dup) result; 6875 bool inword; 6876 bool first = true; 6877 size_t wordstart; 6878 6879 const indentcol = column(indent, tabsize); 6880 6881 result.length = firstindent.length + s.length; 6882 result.length = firstindent.length; 6883 result[] = firstindent[]; 6884 auto col = column(firstindent, tabsize); 6885 foreach (size_t i, dchar c; s) 6886 { 6887 if (isWhite(c)) 6888 { 6889 if (inword) 6890 { 6891 if (first) 6892 { 6893 } 6894 else if (col + 1 + (i - wordstart) > columns) 6895 { 6896 result ~= '\n'; 6897 result ~= indent; 6898 col = indentcol; 6899 } 6900 else 6901 { 6902 result ~= ' '; 6903 col += 1; 6904 } 6905 result ~= s[wordstart .. i]; 6906 col += i - wordstart; 6907 inword = false; 6908 first = false; 6909 } 6910 } 6911 else 6912 { 6913 if (!inword) 6914 { 6915 wordstart = i; 6916 inword = true; 6917 } 6918 } 6919 } 6920 6921 if (inword) 6922 { 6923 if (col + 1 + (s.length - wordstart) > columns) 6924 { 6925 result ~= '\n'; 6926 result ~= indent; 6927 } 6928 else if (result.length != firstindent.length) 6929 result ~= ' '; 6930 result ~= s[wordstart .. s.length]; 6931 } 6932 result ~= '\n'; 6933 6934 return result; 6935 } 6936 6937 /// 6938 @safe pure unittest 6939 { 6940 assert(wrap("a short string", 7) == "a short\nstring\n"); 6941 6942 // wrap will not break inside of a word, but at the next space 6943 assert(wrap("a short string", 4) == "a\nshort\nstring\n"); 6944 6945 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n"); 6946 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n"); 6947 } 6948 6949 @safe pure unittest 6950 { 6951 import std.conv : to; 6952 import std.exception : assertCTFEable; 6953 6954 assertCTFEable!( 6955 { 6956 assert(wrap(string.init) == "\n"); 6957 assert(wrap(" a b df ") == "a b df\n"); 6958 assert(wrap(" a b df ", 3) == "a b\ndf\n"); 6959 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n"); 6960 assert(wrap(" abcd df ", 3) == "abcd\ndf\n"); 6961 assert(wrap("x") == "x\n"); 6962 assert(wrap("u u") == "u u\n"); 6963 assert(wrap("abcd", 3) == "\nabcd\n"); 6964 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n"); 6965 }); 6966 } 6967 6968 @safe pure unittest // https://issues.dlang.org/show_bug.cgi?id=23298 6969 { 6970 assert("1 2 3 4 5 6 7 8 9".wrap(17) == "1 2 3 4 5 6 7 8 9\n"); 6971 assert("1 2 3 4 5 6 7 8 9 ".wrap(17) == "1 2 3 4 5 6 7 8 9\n"); 6972 assert("1 2 3 4 5 6 7 8 99".wrap(17) == "1 2 3 4 5 6 7 8\n99\n"); 6973 } 6974 6975 /****************************************** 6976 * Removes one level of indentation from a multi-line string. 6977 * 6978 * This uniformly outdents the text as much as possible. 6979 * Whitespace-only lines are always converted to blank lines. 6980 * 6981 * Does not allocate memory if it does not throw. 6982 * 6983 * Params: 6984 * str = multi-line string 6985 * 6986 * Returns: 6987 * outdented string 6988 * 6989 * Throws: 6990 * StringException if indentation is done with different sequences 6991 * of whitespace characters. 6992 */ 6993 S outdent(S)(S str) @safe pure 6994 if (isSomeString!S) 6995 { 6996 return str.splitLines(Yes.keepTerminator).outdent().join(); 6997 } 6998 6999 /// 7000 @safe pure unittest 7001 { 7002 enum pretty = q{ 7003 import std.stdio; 7004 void main() { 7005 writeln("Hello"); 7006 } 7007 }.outdent(); 7008 7009 enum ugly = q{ 7010 import std.stdio; 7011 void main() { 7012 writeln("Hello"); 7013 } 7014 }; 7015 7016 assert(pretty == ugly); 7017 } 7018 7019 7020 /****************************************** 7021 * Removes one level of indentation from an array of single-line strings. 7022 * 7023 * This uniformly outdents the text as much as possible. 7024 * Whitespace-only lines are always converted to blank lines. 7025 * 7026 * Params: 7027 * lines = array of single-line strings 7028 * 7029 * Returns: 7030 * lines[] is rewritten in place with outdented lines 7031 * 7032 * Throws: 7033 * StringException if indentation is done with different sequences 7034 * of whitespace characters. 7035 */ 7036 S[] outdent(S)(return scope S[] lines) @safe pure 7037 if (isSomeString!S) 7038 { 7039 import std.algorithm.searching : startsWith; 7040 7041 if (lines.empty) 7042 { 7043 return null; 7044 } 7045 7046 static S leadingWhiteOf(S str) 7047 { 7048 return str[ 0 .. $ - stripLeft(str).length ]; 7049 } 7050 7051 S shortestIndent; 7052 foreach (ref line; lines) 7053 { 7054 const stripped = line.stripLeft(); 7055 7056 if (stripped.empty) 7057 { 7058 line = line[line.chomp().length .. $]; 7059 } 7060 else 7061 { 7062 const indent = leadingWhiteOf(line); 7063 7064 // Comparing number of code units instead of code points is OK here 7065 // because this function throws upon inconsistent indentation. 7066 if (shortestIndent is null || indent.length < shortestIndent.length) 7067 { 7068 if (indent.empty) 7069 return lines; 7070 shortestIndent = indent; 7071 } 7072 } 7073 } 7074 7075 foreach (ref line; lines) 7076 { 7077 const stripped = line.stripLeft(); 7078 7079 if (stripped.empty) 7080 { 7081 // Do nothing 7082 } 7083 else if (line.startsWith(shortestIndent)) 7084 { 7085 line = line[shortestIndent.length .. $]; 7086 } 7087 else 7088 { 7089 throw new StringException("outdent: Inconsistent indentation"); 7090 } 7091 } 7092 7093 return lines; 7094 } 7095 7096 /// 7097 @safe pure unittest 7098 { 7099 auto str1 = [ 7100 " void main()\n", 7101 " {\n", 7102 " test();\n", 7103 " }\n" 7104 ]; 7105 auto str1Expected = [ 7106 "void main()\n", 7107 "{\n", 7108 " test();\n", 7109 "}\n" 7110 ]; 7111 assert(str1.outdent == str1Expected); 7112 7113 auto str2 = [ 7114 "void main()\n", 7115 " {\n", 7116 " test();\n", 7117 " }\n" 7118 ]; 7119 assert(str2.outdent == str2); 7120 } 7121 7122 @safe pure unittest 7123 { 7124 import std.conv : to; 7125 import std.exception : assertCTFEable; 7126 7127 template outdent_testStr(S) 7128 { 7129 enum S outdent_testStr = 7130 " 7131 \t\tX 7132 \t\U00010143X 7133 \t\t 7134 7135 \t\t\tX 7136 \t "; 7137 } 7138 7139 template outdent_expected(S) 7140 { 7141 enum S outdent_expected = 7142 " 7143 \tX 7144 \U00010143X 7145 7146 7147 \t\tX 7148 "; 7149 } 7150 7151 assertCTFEable!( 7152 { 7153 7154 static foreach (S; AliasSeq!(string, wstring, dstring)) 7155 {{ 7156 enum S blank = ""; 7157 assert(blank.outdent() == blank); 7158 static assert(blank.outdent() == blank); 7159 7160 enum S testStr1 = " \n \t\n "; 7161 enum S expected1 = "\n\n"; 7162 assert(testStr1.outdent() == expected1); 7163 static assert(testStr1.outdent() == expected1); 7164 7165 assert(testStr1[0..$-1].outdent() == expected1); 7166 static assert(testStr1[0..$-1].outdent() == expected1); 7167 7168 enum S testStr2 = "a\n \t\nb"; 7169 assert(testStr2.outdent() == testStr2); 7170 static assert(testStr2.outdent() == testStr2); 7171 7172 enum S testStr3 = 7173 " 7174 \t\tX 7175 \t\U00010143X 7176 \t\t 7177 7178 \t\t\tX 7179 \t "; 7180 7181 enum S expected3 = 7182 " 7183 \tX 7184 \U00010143X 7185 7186 7187 \t\tX 7188 "; 7189 assert(testStr3.outdent() == expected3); 7190 static assert(testStr3.outdent() == expected3); 7191 7192 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X"; 7193 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X"; 7194 assert(testStr4.outdent() == expected4); 7195 static assert(testStr4.outdent() == expected4); 7196 7197 enum testStr5 = testStr4[0..$-1]; 7198 enum expected5 = expected4[0..$-1]; 7199 assert(testStr5.outdent() == expected5); 7200 static assert(testStr5.outdent() == expected5); 7201 7202 enum testStr6 = " \r \n \r\n \u2028 \u2029"; 7203 enum expected6 = "\r\n\r\n\u2028\u2029"; 7204 assert(testStr6.outdent() == expected6); 7205 static assert(testStr6.outdent() == expected6); 7206 7207 enum testStr7 = " a \n b "; 7208 enum expected7 = "a \nb "; 7209 assert(testStr7.outdent() == expected7); 7210 static assert(testStr7.outdent() == expected7); 7211 }} 7212 }); 7213 } 7214 7215 @safe pure unittest 7216 { 7217 import std.exception : assertThrown; 7218 auto bad = " a\n\tb\n c"; 7219 assertThrown!StringException(bad.outdent); 7220 } 7221 7222 /** Assume the given array of integers `arr` is a well-formed UTF string and 7223 return it typed as a UTF string. 7224 7225 `ubyte` becomes `char`, `ushort` becomes `wchar` and `uint` 7226 becomes `dchar`. Type qualifiers are preserved. 7227 7228 When compiled with debug mode, this function performs an extra check to make 7229 sure the return value is a valid Unicode string. 7230 7231 Params: 7232 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints 7233 7234 Returns: 7235 arr retyped as an array of chars, wchars, or dchars 7236 7237 Throws: 7238 In debug mode `AssertError`, when the result is not a well-formed UTF string. 7239 7240 See_Also: $(LREF representation) 7241 */ 7242 auto assumeUTF(T)(T[] arr) 7243 if (staticIndexOf!(immutable T, immutable ubyte, immutable ushort, immutable uint) != -1) 7244 { 7245 import std.traits : ModifyTypePreservingTQ; 7246 import std.exception : collectException; 7247 import std.utf : validate; 7248 7249 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2]; 7250 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[]) arr; 7251 7252 debug 7253 { 7254 scope ex = collectException(validate(asUTF)); 7255 assert(!ex, ex.msg); 7256 } 7257 7258 return asUTF; 7259 } 7260 7261 /// 7262 @safe pure unittest 7263 { 7264 string a = "Hölo World"; 7265 immutable(ubyte)[] b = a.representation; 7266 string c = b.assumeUTF; 7267 7268 assert(c == "Hölo World"); 7269 } 7270 7271 pure @system unittest 7272 { 7273 import std.algorithm.comparison : equal; 7274 static foreach (T; AliasSeq!(char[], wchar[], dchar[])) 7275 {{ 7276 immutable T jti = "Hello World"; 7277 T jt = jti.dup; 7278 7279 static if (is(T == char[])) 7280 { 7281 auto gt = cast(ubyte[]) jt; 7282 auto gtc = cast(const(ubyte)[])jt; 7283 auto gti = cast(immutable(ubyte)[])jt; 7284 } 7285 else static if (is(T == wchar[])) 7286 { 7287 auto gt = cast(ushort[]) jt; 7288 auto gtc = cast(const(ushort)[])jt; 7289 auto gti = cast(immutable(ushort)[])jt; 7290 } 7291 else static if (is(T == dchar[])) 7292 { 7293 auto gt = cast(uint[]) jt; 7294 auto gtc = cast(const(uint)[])jt; 7295 auto gti = cast(immutable(uint)[])jt; 7296 } 7297 7298 auto ht = assumeUTF(gt); 7299 auto htc = assumeUTF(gtc); 7300 auto hti = assumeUTF(gti); 7301 assert(equal(jt, ht)); 7302 assert(equal(jt, htc)); 7303 assert(equal(jt, hti)); 7304 }} 7305 } 7306 7307 pure @system unittest 7308 { 7309 import core.exception : AssertError; 7310 import std.exception : assertThrown, assertNotThrown; 7311 7312 immutable(ubyte)[] a = [ 0xC0 ]; 7313 7314 debug 7315 assertThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () ); 7316 else 7317 assertNotThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () ); 7318 }