1 /++
2 $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3 are a commonly used method of pattern matching
4 on strings, with $(I regex) being a catchy word for a pattern in this domain
5 specific language. Typical problems usually solved by regular expressions
6 include validation of user input and the ubiquitous find $(AMP) replace
7 in text processing utilities.
8 9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(DIVC quickindex,
11 $(BOOKTABLE,
12 $(TR $(TH Category) $(TH Functions))
13 $(TR $(TD Matching) $(TD
14 $(LREF bmatch)
15 $(LREF match)
16 $(LREF matchAll)
17 $(LREF matchFirst)
18 ))
19 $(TR $(TD Building) $(TD
20 $(LREF ctRegex)
21 $(LREF escaper)
22 $(LREF regex)
23 ))
24 $(TR $(TD Replace) $(TD
25 $(LREF replace)
26 $(LREF replaceAll)
27 $(LREF replaceAllInto)
28 $(LREF replaceFirst)
29 $(LREF replaceFirstInto)
30 ))
31 $(TR $(TD Split) $(TD
32 $(LREF split)
33 $(LREF splitter)
34 ))
35 $(TR $(TD Objects) $(TD
36 $(LREF Captures)
37 $(LREF Regex)
38 $(LREF RegexException)
39 $(LREF RegexMatch)
40 $(LREF Splitter)
41 $(LREF StaticRegex)
42 ))
43 ))
44 45 $(SECTION Synopsis)
46 47 Create a regex at runtime:
48 $(RUNNABLE_EXAMPLE
49 $(RUNNABLE_EXAMPLE_STDIN
50 They met on 24/01/1970.
51 7/8/99 wasn't as hot as 7/8/2022.
52 )
53 ---
54 import std.regex;
55 import std.stdio;
56 // Print out all possible dd/mm/yy(yy) dates found in user input.
57 auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
58 foreach (line; stdin.byLine)
59 {
60 // matchAll() returns a range that can be iterated
61 // to get all subsequent matches.
62 foreach (c; matchAll(line, r))
63 writeln(c.hit);
64 }
65 ---
66 )
67 Create a static regex at compile-time, which contains fast native code:
68 $(RUNNABLE_EXAMPLE
69 ---
70 import std.regex;
71 auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
72 73 // It works just like a normal regex:
74 auto c2 = matchFirst("foo/bar", ctr); // First match found here, if any
75 assert(!c2.empty); // Be sure to check if there is a match before examining contents!
76 assert(c2[1] == "bar"); // Captures is a range of submatches: 0 = full match.
77 ---
78 )
79 Multi-pattern regex:
80 $(RUNNABLE_EXAMPLE
81 ---
82 import std.regex;
83 auto multi = regex([`\d+,\d+`, `([a-z]+):(\d+)`]);
84 auto m = "abc:43 12,34".matchAll(multi);
85 assert(m.front.whichPattern == 2);
86 assert(m.front[1] == "abc");
87 assert(m.front[2] == "43");
88 m.popFront();
89 assert(m.front.whichPattern == 1);
90 assert(m.front[0] == "12,34");
91 ---
92 )
93 $(LREF Captures) and `opCast!bool`:
94 $(RUNNABLE_EXAMPLE
95 ---
96 import std.regex;
97 // The result of `matchAll/matchFirst` is directly testable with `if/assert/while`,
98 // e.g. test if a string consists of letters only:
99 assert(matchFirst("LettersOnly", `^\p{L}+$`));
100 101 // And we can take advantage of the ability to define a variable in the IfCondition:
102 if (const captures = matchFirst("At l34st one digit, but maybe more...", `((\d)(\d*))`))
103 {
104 assert(captures[2] == "3");
105 assert(captures[3] == "4");
106 assert(captures[1] == "34");
107 }
108 ---
109 )
110 See_Also: $(LINK2 https://dlang.org/spec/statement.html#IfCondition, `IfCondition`).
111 112 $(SECTION Syntax and general information)
113 The general usage guideline is to keep regex complexity on the side of simplicity,
114 as its capabilities reside in purely character-level manipulation.
115 As such it's ill-suited for tasks involving higher level invariants
116 like matching an integer number $(U bounded) in an [a,b] interval.
117 Checks of this sort of are better addressed by additional post-processing.
118 119 The basic syntax shouldn't surprise experienced users of regular expressions.
120 For an introduction to `std.regex` see a
121 $(HTTP dlang.org/regular-expression.html, short tour) of the module API
122 and its abilities.
123 124 There are other web resources on regular expressions to help newcomers,
125 and a good $(HTTP www.regular-expressions.info, reference with tutorial)
126 can easily be found.
127 128 This library uses a remarkably common ECMAScript syntax flavor
129 with the following extensions:
130 $(UL
131 $(LI Named subexpressions, with Python syntax. )
132 $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
133 $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
134 )
135 136 $(REG_START Pattern syntax )
137 $(I std.regex operates on codepoint level,
138 'character' in this table denotes a single Unicode codepoint.)
139 $(REG_TABLE
140 $(REG_TITLE Pattern element, Semantics )
141 $(REG_TITLE Atoms, Match single characters )
142 $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
143 $(REG_ROW ., In single line mode matches any character.
144 Otherwise it matches any character except '\n' and '\r'. )
145 $(REG_ROW [class], Matches a single character
146 that belongs to this character class. )
147 $(REG_ROW [^class], Matches a single character that
148 does $(U not) belong to this character class.)
149 $(REG_ROW \cC, Matches the control character corresponding to letter C)
150 $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
151 $(REG_ROW \uXXXX, Matches a character with hexadecimal value of XXXX. )
152 $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
153 $(REG_ROW \f, Matches a formfeed character. )
154 $(REG_ROW \n, Matches a linefeed character. )
155 $(REG_ROW \r, Matches a carriage return character. )
156 $(REG_ROW \t, Matches a tab character. )
157 $(REG_ROW \v, Matches a vertical tab character. )
158 $(REG_ROW \d, Matches any Unicode digit. )
159 $(REG_ROW \D, Matches any character except Unicode digits. )
160 $(REG_ROW \w, Matches any word character (note: this includes numbers).)
161 $(REG_ROW \W, Matches any non-word character.)
162 $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
163 $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
164 $(REG_ROW \\\\, Matches \ character. )
165 $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
166 $(REG_ROW \p{PropertyName}, Matches a character that belongs
167 to the Unicode PropertyName set.
168 Single letter abbreviations can be used without surrounding {,}. )
169 $(REG_ROW \P{PropertyName}, Matches a character that does not belong
170 to the Unicode PropertyName set.
171 Single letter abbreviations can be used without surrounding {,}. )
172 $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
173 the BasicLatin Unicode $(U block).)
174 $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
175 the BasicLatin Unicode $(U block).)
176 $(REG_ROW \p{Cyrillic}, Matches any character that is part of
177 Cyrillic $(U script).)
178 $(REG_ROW \P{Cyrillic}, Matches any character except ones in
179 Cyrillic $(U script).)
180 $(REG_TITLE Quantifiers, Specify repetition of other elements)
181 $(REG_ROW *, Matches previous character/subexpression 0 or more times.
182 Greedy version - tries as many times as possible.)
183 $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
184 Lazy version - stops as early as possible.)
185 $(REG_ROW +, Matches previous character/subexpression 1 or more times.
186 Greedy version - tries as many times as possible.)
187 $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
188 Lazy version - stops as early as possible.)
189 $(REG_ROW ?, Matches previous character/subexpression 0 or 1 time.
190 Greedy version - tries as many times as possible.)
191 $(REG_ROW ??, Matches previous character/subexpression 0 or 1 time.
192 Lazy version - stops as early as possible.)
193 $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
194 $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
195 Greedy version - tries as many times as possible. )
196 $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
197 Lazy version - stops as early as possible.)
198 $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
199 Greedy version - tries as many times as possible, but no more than m times. )
200 $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
201 Lazy version - stops as early as possible, but no less then n times.)
202 $(REG_TITLE Other, Subexpressions $(AMP) alternations )
203 $(REG_ROW (regex), Matches subexpression regex,
204 saving matched portion of text for later retrieval. )
205 $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
206 $(REG_ROW (?:regex), Matches subexpression regex,
207 $(U not) saving matched portion of text. Useful to speed up matching. )
208 $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
209 $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
210 regex labeling it with name 'name'.
211 When referring to a matched portion of text,
212 names work like aliases in addition to direct numbers.
213 )
214 $(REG_TITLE Assertions, Match position rather than character )
215 $(REG_ROW ^, Matches at the beginning of input or line (in multiline mode).)
216 $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
217 $(REG_ROW \b, Matches at word boundary. )
218 $(REG_ROW \B, Matches when $(U not) at word boundary. )
219 $(REG_ROW (?=regex), Zero-width lookahead assertion.
220 Matches at a point where the subexpression
221 regex could be matched starting from the current position.
222 )
223 $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
224 Matches at a point where the subexpression
225 regex could $(U not) be matched starting from the current position.
226 )
227 $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
228 where the subexpression regex could be matched ending
229 at the current position (matching goes backwards).
230 )
231 $(REG_ROW (?<!regex), Zero-width negative lookbehind assertion.
232 Matches at a point where the subexpression regex could $(U not)
233 be matched ending at the current position (matching goes backwards).
234 )
235 )
236 237 $(REG_START Character classes )
238 $(REG_TABLE
239 $(REG_TITLE Pattern element, Semantics )
240 $(REG_ROW Any atom, Has the same meaning as outside of a character class,
241 except for ] which must be written as \\])
242 $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
243 $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
244 Where a, b are arbitrary classes, means union, set difference,
245 symmetric set difference, and intersection respectively.
246 $(I Any sequence of character class elements implicitly forms a union.) )
247 )
248 249 $(REG_START Regex flags )
250 $(REG_TABLE
251 $(REG_TITLE Flag, Semantics )
252 $(REG_ROW g, Global regex, repeat over the whole input. )
253 $(REG_ROW i, Case insensitive matching. )
254 $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
255 as well as start and end of input.)
256 $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
257 $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
258 useful for formatting complex regular expressions. )
259 )
260 261 $(SECTION Unicode support)
262 263 This library provides full Level 1 support* according to
264 $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
265 $(UL
266 $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
267 $(LI 1.2 Unicode properties.)
268 $(LI 1.3 Character classes with set operations.)
269 $(LI 1.4 Word boundaries use the full set of "word" characters.)
270 $(LI 1.5 Using simple casefolding to match case
271 insensitively across the full range of codepoints.)
272 $(LI 1.6 Respecting line breaks as any of
273 \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
274 $(LI 1.7 Operating on codepoint level.)
275 )
276 *With exception of point 1.1.1, as of yet, normalization of input
277 is expected to be enforced by user.
278 279 $(SECTION Replace format string)
280 281 A set of functions in this module that do the substitution rely
282 on a simple format to guide the process. In particular the table below
283 applies to the `format` argument of
284 $(LREF replaceFirst) and $(LREF replaceAll).
285 286 The format string can reference parts of match using the following notation.
287 $(REG_TABLE
288 $(REG_TITLE Format specifier, Replaced by )
289 $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
290 $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
291 $(REG_ROW $', part of input $(I following) the match. )
292 $(REG_ROW $$, '$' character. )
293 $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
294 $(REG_ROW \\\\, '\\' character. )
295 $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
296 )
297 298 $(SECTION Slicing and zero memory allocations orientation)
299 300 All matches returned by pattern matching functionality in this library
301 are slices of the original input. The notable exception is the `replace`
302 family of functions that generate a new string from the input.
303 304 In cases where producing the replacement is the ultimate goal
305 $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
306 as functions that avoid allocations even for replacement.
307 308 Copyright: Copyright Dmitry Olshansky, 2011-
309 310 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
311 312 Authors: Dmitry Olshansky,
313 314 API and utility constructs are modeled after the original `std.regex`
315 by Walter Bright and Andrei Alexandrescu.
316 317 Source: $(PHOBOSSRC std/regex/package.d)
318 319 Macros:
320 REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
321 REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
322 REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
323 REG_START = <h3><div align="center"> $0 </div></h3>
324 SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
325 S_LINK = <a href="#$1">$+</a>
326 +/327 modulestd.regex;
328 329 importstd.range.primitives, std.traits;
330 importstd.regex.internal.ir;
331 importstd.typecons : Flag, Yes, No;
332 333 /++
334 `Regex` object holds regular expression pattern in compiled form.
335 336 Instances of this object are constructed via calls to `regex`.
337 This is an intended form for caching and storage of frequently
338 used regular expressions.
339 340 Example:
341 342 Test if this object doesn't contain any compiled pattern.
343 ---
344 Regex!char r;
345 assert(r.empty);
346 r = regex(""); // Note: "" is a valid regex pattern.
347 assert(!r.empty);
348 ---
349 350 Getting a range of all the named captures in the regex.
351 ----
352 import std.range;
353 import std.algorithm;
354 355 auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
356 auto nc = re.namedCaptures;
357 static assert(isRandomAccessRange!(typeof(nc)));
358 assert(!nc.empty);
359 assert(nc.length == 2);
360 assert(nc.equal(["name", "var"]));
361 assert(nc[0] == "name");
362 assert(nc[1..$].equal(["var"]));
363 ----
364 +/365 publicaliasRegex(Char) = std.regex.internal.ir.Regex!(Char);
366 367 /++
368 A `StaticRegex` is `Regex` object that contains D code specially
369 generated at compile-time to speed up matching.
370 371 No longer used, kept as alias to Regex for backwards compatibility.
372 +/373 publicaliasStaticRegex = Regex;
374 375 /++
376 Compile regular expression pattern for the later execution.
377 Returns: `Regex` object that works on inputs having
378 the same character width as `pattern`.
379 380 Params:
381 pattern = A single regular expression to match.
382 patterns = An array of regular expression strings.
383 The resulting `Regex` object will match any expression;
384 use $(LREF whichPattern) to know which.
385 flags = The _attributes (g, i, m, s and x accepted)
386 387 Throws: `RegexException` if there were any errors during compilation.
388 +/389 @trustedpublicautoregex(S : C[], C)(constS[] patterns, const(char)[] flags="")
390 if (isSomeString!(S))
391 {
392 importstd.array : appender;
393 importstd.functional : memoize;
394 enumcacheSize = 8; //TODO: invent nice interface to control regex caching395 const(C)[] pat;
396 if (patterns.length > 1)
397 {
398 autoapp = appender!S();
399 foreach (i, p; patterns)
400 {
401 if (i != 0)
402 app.put("|");
403 app.put("(?:");
404 app.put(patterns[i]);
405 // terminator for the pattern406 // to detect if the pattern unexpectedly ends407 app.put("\\");
408 app.put(cast(dchar)(privateUseStart+i));
409 app.put(")");
410 // another one to return correct whichPattern411 // for all of potential alternatives in the patterns[i]412 app.put("\\");
413 app.put(cast(dchar)(privateUseStart+i));
414 }
415 pat = app.data;
416 }
417 else418 pat = patterns[0];
419 420 if (__ctfe)
421 returnregexImpl(pat, flags);
422 returnmemoize!(regexImpl!S, cacheSize)(pat, flags);
423 }
424 425 ///ditto426 @trustedpublicautoregex(S)(Spattern, const(char)[] flags="")
427 if (isSomeString!(S))
428 {
429 returnregex([pattern], flags);
430 }
431 432 ///433 @systemunittest434 {
435 voidtest(S)()
436 {
437 // multi-pattern regex example438 S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
439 automulti = regex(arr); // multi regex440 Sstr = "abc:43 12,34";
441 autom = str.matchAll(multi);
442 assert(m.front.whichPattern == 1);
443 assert(m.front[1] == "abc");
444 assert(m.front[2] == "43");
445 m.popFront();
446 assert(m.front.whichPattern == 2);
447 assert(m.front[1] == "12");
448 }
449 450 importstd.meta : AliasSeq;
451 staticforeach (C; AliasSeq!(string, wstring, dstring))
452 // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301453 staticforeach (S; AliasSeq!(C, constC, immutableC))
454 test!S();
455 }
456 457 @systemunittest458 {
459 importstd.conv : to;
460 importstd.string : indexOf;
461 462 immutablepattern = "s+";
463 autoregexString = to!string(regex(pattern, "U"));
464 assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
465 assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
466 assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
467 }
468 469 publicautoregexImpl(S)(constSpattern, const(char)[] flags="")
470 if (isSomeString!(typeof(pattern)))
471 {
472 importstd.regex.internal.parser : Parser, CodeGen;
473 autoparser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
474 autor = parser.program;
475 returnr;
476 }
477 478 479 privatestructCTRegexWrapper(Char)
480 {
481 privateimmutable(Regex!Char)* re;
482 483 // allow code that expects mutable Regex to still work484 // we stay "logically const"485 @property @trustedrefgetRe() const { return *cast(Regex!Char*) re; }
486 aliasgetRethis;
487 }
488 489 templatectRegexImpl(aliaspattern, stringflags="")
490 {
491 importstd.regex.internal.backtracking, std.regex.internal.parser;
492 staticimmutabler = cast(immutable) regex(pattern, flags);
493 aliasChar = BasicElementOf!(typeof(pattern));
494 enumsource = ctGenRegExCode(r);
495 @trustedpureboolfunc(BacktrackingMatcher!Charmatcher)
496 {
497 debug(std_regex_ctr) pragma(msg, source);
498 cast(void) matcher;
499 mixin(source);
500 }
501 staticimmutablestaticRe =
502 cast(immutable) r.withFactory(newCtfeFactory!(BacktrackingMatcher, Char, func));
503 enumwrapper = CTRegexWrapper!Char(&staticRe);
504 }
505 506 @safepureunittest507 {
508 // test compat for logical const workaround509 staticvoidtest(StaticRegex!char)
510 {
511 }
512 enumre = ctRegex!``;
513 test(re);
514 }
515 516 @safepureunittest517 {
518 autore = ctRegex!`foo`;
519 assert(matchFirst("foo", re));
520 521 // test reassignment522 re = ctRegex!`bar`;
523 assert(matchFirst("bar", re));
524 assert(!matchFirst("bar", ctRegex!`foo`));
525 }
526 527 /++
528 Compile regular expression using CTFE
529 and generate optimized native machine code for matching it.
530 531 Returns: StaticRegex object for faster matching.
532 533 Params:
534 pattern = Regular expression
535 flags = The _attributes (g, i, m, s and x accepted)
536 +/537 publicenumctRegex(aliaspattern, stringflags="") = ctRegexImpl!(pattern, flags).wrapper;
538 539 enumisRegexFor(RegEx, R) = is(immutableRegEx == immutableRegex!(BasicElementOf!R))
540 || is(RegEx : const(Regex!(BasicElementOf!R)))
541 || is(immutableRegEx == immutableStaticRegex!(BasicElementOf!R));
542 543 544 /++
545 `Captures` object contains submatches captured during a call
546 to `match` or iteration over `RegexMatch` range.
547 548 First element of range is the whole match.
549 +/550 @trustedpublicstructCaptures(R)
551 if (isSomeString!R)
552 {//@trusted because of union inside553 aliasDataIndex = size_t;
554 aliasString = R;
555 aliasStore = SmallFixedArray!(Group!DataIndex, 3);
556 private:
557 importstd.conv : text;
558 Storematches;
559 const(NamedGroup)[] _names;
560 R_input;
561 int_nMatch;
562 uint_f, _b;
563 564 this(Rinput, uintn, const(NamedGroup)[] named)
565 {
566 _input = input;
567 _names = named;
568 matches = Store(n);
569 _b = n;
570 _f = 0;
571 }
572 573 this(refRegexMatch!Rrmatch)
574 {
575 _input = rmatch._input;
576 _names = rmatch._engine.pattern.dict;
577 immutablen = rmatch._engine.pattern.ngroup;
578 matches = Store(n);
579 _b = n;
580 _f = 0;
581 }
582 583 inout(R) getMatch(size_tindex) inout584 {
585 autom = &matches[index];
586 return *m ? _input[m.begin .. m.end] : null;
587 }
588 589 public:
590 ///Slice of input prior to the match.591 @propertyRpre()
592 {
593 return_nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
594 }
595 596 ///Slice of input immediately after the match.597 @propertyRpost()
598 {
599 return_nMatch == 0 ? _input[] : _input[matches[0].end .. $];
600 }
601 602 ///Slice of matched portion of input.603 @propertyRhit()
604 {
605 assert(_nMatch, "attempted to get hit of an empty match");
606 return_input[matches[0].begin .. matches[0].end];
607 }
608 609 ///Range interface.610 @propertyRfront()
611 {
612 assert(_nMatch, "attempted to get front of an empty match");
613 returngetMatch(_f);
614 }
615 616 ///ditto617 @propertyRback()
618 {
619 assert(_nMatch, "attempted to get back of an empty match");
620 returngetMatch(_b - 1);
621 }
622 623 ///ditto624 voidpopFront()
625 {
626 assert(!empty);
627 ++_f;
628 }
629 630 ///ditto631 voidpopBack()
632 {
633 assert(!empty);
634 --_b;
635 }
636 637 ///ditto638 @propertyboolempty() const { return_nMatch == 0 || _f >= _b; }
639 640 ///ditto641 inout(R) opIndex()(size_ti) inout642 {
643 assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
644 returngetMatch(_f + i);
645 }
646 647 /++
648 Explicit cast to bool.
649 Useful as a shorthand for !(x.empty) in if and assert statements.
650 651 ---
652 import std.regex;
653 654 assert(!matchFirst("nothing", "something"));
655 ---
656 +/657 658 @safeboolopCast(T:bool)() constnothrow { return_nMatch != 0; }
659 660 /++
661 Number of pattern matched counting, where 1 - the first pattern.
662 Returns 0 on no match.
663 +/664 665 @safe @propertyintwhichPattern() constnothrow { return_nMatch; }
666 667 ///668 @systemunittest669 {
670 importstd.regex;
671 assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
672 }
673 674 /++
675 Lookup named submatch.
676 677 ---
678 import std.regex;
679 import std.range;
680 681 auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
682 assert(c["var"] == "a");
683 assert(c["value"] == "42");
684 popFrontN(c, 2);
685 //named groups are unaffected by range primitives
686 assert(c["var"] =="a");
687 assert(c.front == "42");
688 ----
689 +/690 RopIndex(String)(Stringi) /*const*///@@@BUG@@@691 if (isSomeString!String)
692 {
693 size_tindex = lookupNamedGroup(_names, i);
694 returngetMatch(index);
695 }
696 697 ///Number of matches in this object.698 @propertysize_tlength() const { return_nMatch == 0 ? 0 : _b - _f; }
699 700 ///A hook for compatibility with original std.regex.701 @propertyrefcaptures(){ returnthis; }
702 }
703 704 ///705 @systemunittest706 {
707 importstd.range.primitives : popFrontN;
708 709 autoc = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
710 assert(c.pre == "@"); // Part of input preceding match711 assert(c.post == "#"); // Immediately after match712 assert(c.hit == c[0] && c.hit == "abc"); // The whole match713 assert(c[2] == "b");
714 assert(c.front == "abc");
715 c.popFront();
716 assert(c.front == "a");
717 assert(c.back == "c");
718 c.popBack();
719 assert(c.back == "b");
720 popFrontN(c, 2);
721 assert(c.empty);
722 723 assert(!matchFirst("nothing", "something"));
724 725 // Captures that are not matched will be null.726 c = matchFirst("ac", regex(`a(b)?c`));
727 assert(c);
728 assert(!c[1]);
729 }
730 731 @systemunittest732 {
733 Captures!stringc;
734 strings = "abc";
735 assert(cast(bool)(c = matchFirst(s, regex("d")))
736 || cast(bool)(c = matchFirst(s, regex("a"))));
737 }
738 739 // https://issues.dlang.org/show_bug.cgi?id=19979740 @systemunittest741 {
742 autoc = matchFirst("bad", regex(`(^)(not )?bad($)`));
743 assert(c[0] && c[0].length == "bad".length);
744 assert(c[1] && !c[1].length);
745 assert(!c[2]);
746 assert(c[3] && !c[3].length);
747 }
748 749 /++
750 A regex engine state, as returned by `match` family of functions.
751 752 Effectively it's a forward range of Captures!R, produced
753 by lazily searching for matches in a given input.
754 +/755 @trustedpublicstructRegexMatch(R)
756 if (isSomeString!R)
757 {
758 importstd.typecons : Rebindable;
759 private:
760 aliasChar = BasicElementOf!R;
761 Matcher!Char_engine;
762 Rebindable!(constMatcherFactory!Char) _factory;
763 R_input;
764 Captures!R_captures;
765 766 this(RegEx)(Rinput, RegExprog)
767 {
768 importstd.exception : enforce;
769 _input = input;
770 if (prog.factoryisnull) _factory = defaultFactory!Char(prog);
771 else_factory = prog.factory;
772 _engine = _factory.create(prog, input);
773 assert(_engine.refCount == 1);
774 _captures = Captures!R(this);
775 _captures.matches.mutate((slice) pure { _captures._nMatch = _engine.match(slice); });
776 }
777 778 public:
779 this(this)
780 {
781 if (_engine) _factory.incRef(_engine);
782 }
783 784 ~this()
785 {
786 if (_engine) _factory.decRef(_engine);
787 }
788 789 ///Shorthands for front.pre, front.post, front.hit.790 @propertyRpre()
791 {
792 return_captures.pre;
793 }
794 795 ///ditto796 @propertyRpost()
797 {
798 return_captures.post;
799 }
800 801 ///ditto802 @propertyRhit()
803 {
804 return_captures.hit;
805 }
806 807 /++
808 Functionality for processing subsequent matches of global regexes via range interface:
809 ---
810 import std.regex;
811 auto m = matchAll("Hello, world!", regex(`\w+`));
812 assert(m.front.hit == "Hello");
813 m.popFront();
814 assert(m.front.hit == "world");
815 m.popFront();
816 assert(m.empty);
817 ---
818 +/819 @propertyinout(Captures!R) front() inout820 {
821 return_captures;
822 }
823 824 ///ditto825 voidpopFront()
826 {
827 importstd.exception : enforce;
828 // CoW - if refCount is not 1, we are aliased by somebody else829 if (_engine.refCount != 1)
830 {
831 // we create a new engine & abandon this reference832 autoold = _engine;
833 _engine = _factory.dup(old, _input);
834 _factory.decRef(old);
835 }
836 _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
837 }
838 839 ///ditto840 autosave(){ returnthis; }
841 842 ///Test if this match object is empty.843 @propertyboolempty() const { return_captures._nMatch == 0; }
844 845 ///Same as !(x.empty), provided for its convenience in conditional statements.846 TopCast(T:bool)(){ return !empty; }
847 848 /// Same as .front, provided for compatibility with original std.regex.849 @propertyinout(Captures!R) captures() inout { return_captures; }
850 }
851 852 privateautomatchOnceImpl(RegEx, R)(Rinput, constautorefRegExprog) @trusted853 {
854 aliasChar = BasicElementOf!R;
855 staticstructKey856 {
857 immutable(Char)[] pattern;
858 uintflags;
859 }
860 staticKeycacheKey = Key("", -1);
861 staticMatcher!Charcache;
862 autofactory = prog.factoryisnull ? defaultFactory!Char(prog) : prog.factory;
863 autokey = Key(prog.pattern, prog.flags);
864 Matcher!Charengine;
865 if (cacheKey == key)
866 {
867 engine = cache;
868 engine.rearm(input);
869 }
870 else871 {
872 engine = factory.create(prog, input);
873 if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one874 cache = engine;
875 cacheKey = key;
876 }
877 autocaptures = Captures!R(input, prog.ngroup, prog.dict);
878 captures.matches.mutate((slice) pure { captures._nMatch = engine.match(slice); });
879 returncaptures;
880 }
881 882 // matchOnce is constructed as a safe, pure wrapper over matchOnceImpl. It can be883 // faked as pure because the static mutable variables are used to cache the key and884 // character matcher. The technique used avoids delegates and GC.885 private @safeautomatchOnce(RegEx, R)(Rinput, constautorefRegExprog) pure886 {
887 staticautoimpl(Rinput, constrefRegExprog)
888 {
889 returnmatchOnceImpl(input, prog);
890 }
891 892 static @trustedautopureImpl(Rinput, constrefRegExprog)
893 {
894 autop = assumePureFunction(&impl);
895 returnp(input, prog);
896 }
897 898 returnpureImpl(input, prog);
899 }
900 901 privateautomatchMany(RegEx, R)(Rinput, autorefRegExre) @safe902 {
903 returnRegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
904 }
905 906 @systemunittest907 {
908 //sanity checks for new API909 autore = regex("abc");
910 assert(!"abc".matchOnce(re).empty);
911 assert("abc".matchOnce(re)[0] == "abc");
912 }
913 914 // https://issues.dlang.org/show_bug.cgi?id=18135915 @systemunittest916 {
917 staticstructMapResult { RegexMatch!stringm; }
918 MapResultm;
919 m = MapResult();
920 assert(m == m);
921 }
922 923 privateenumisReplaceFunctor(aliasfun, R) =
924 __traits(compiles, (Captures!Rc) { fun(c); });
925 926 // the lowest level - just stuff replacements into the sink927 private @trustedvoidreplaceCapturesInto(aliasoutput, Sink, R, T)
928 (refSinksink, Rinput, Tcaptures)
929 if (isOutputRange!(Sink, dchar) && isSomeString!R)
930 {
931 if (captures.empty)
932 {
933 sink.put(input);
934 return;
935 }
936 sink.put(captures.pre);
937 // a hack to get around bogus errors, should be simply output(captures, sink)938 // "is a nested function and cannot be accessed from"939 staticif (isReplaceFunctor!(output, R))
940 sink.put(output(captures)); //"mutator" type of function941 else942 output(captures, sink); //"output" type of function943 sink.put(captures.post);
944 }
945 946 // ditto for a range of captures947 privatevoidreplaceMatchesInto(aliasoutput, Sink, R, T)
948 (refSinksink, Rinput, Tmatches)
949 if (isOutputRange!(Sink, dchar) && isSomeString!R)
950 {
951 size_toffset = 0;
952 foreach (cap; matches)
953 {
954 sink.put(cap.pre[offset .. $]);
955 // same hack, see replaceCapturesInto956 staticif (isReplaceFunctor!(output, R))
957 sink.put(output(cap)); //"mutator" type of function958 else959 output(cap, sink); //"output" type of function960 offset = cap.pre.length + cap.hit.length;
961 }
962 sink.put(input[offset .. $]);
963 }
964 965 // a general skeleton of replaceFirst966 privateRreplaceFirstWith(aliasoutput, R, RegEx)(Rinput, RegExre)
967 if (isSomeString!R && isRegexFor!(RegEx, R))
968 {
969 importstd.array : appender;
970 autodata = matchFirst(input, re);
971 if (data.empty)
972 returninput;
973 autoapp = appender!(R)();
974 replaceCapturesInto!output(app, input, data);
975 returnapp.data;
976 }
977 978 // ditto for replaceAll979 // the method parameter allows old API to ride on the back of the new one980 privateRreplaceAllWith(aliasoutput,
981 aliasmethod=matchAll, R, RegEx)(Rinput, RegExre)
982 if (isSomeString!R && isRegexFor!(RegEx, R))
983 {
984 importstd.array : appender;
985 automatches = method(input, re); //inout(C)[] fails986 if (matches.empty)
987 returninput;
988 autoapp = appender!(R)();
989 replaceMatchesInto!output(app, input, matches);
990 returnapp.data;
991 }
992 993 994 /++
995 Start matching `input` to regex pattern `re`,
996 using Thompson NFA matching scheme.
997 998 The use of this function is $(RED discouraged) - use either of
999 $(LREF matchAll) or $(LREF matchFirst).
1000 1001 Delegating the kind of operation
1002 to "g" flag is soon to be phased out along with the
1003 ability to choose the exact matching scheme. The choice of
1004 matching scheme to use depends highly on the pattern kind and
1005 can done automatically on case by case basis.
1006 1007 Returns: a `RegexMatch` object holding engine state after first match.
1008 +/1009 1010 publicautomatch(R, RegEx)(Rinput, RegExre)
1011 if (isSomeString!R && isRegexFor!(RegEx,R))
1012 {
1013 returnRegexMatch!(Unqual!(typeof(input)))(input, re);
1014 }
1015 1016 ///ditto1017 publicautomatch(R, String)(Rinput, Stringre)
1018 if (isSomeString!R && isSomeString!String)
1019 {
1020 returnRegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1021 }
1022 1023 /++
1024 Find the first (leftmost) slice of the `input` that
1025 matches the pattern `re`. This function picks the most suitable
1026 regular expression engine depending on the pattern properties.
1027 1028 `re` parameter can be one of three types:
1029 $(UL
1030 $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1031 $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1032 compiled bytecode. )
1033 $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1034 compiled native machine code. )
1035 )
1036 1037 Returns:
1038 $(LREF Captures) containing the extent of a match together with all submatches
1039 if there was a match, otherwise an empty $(LREF Captures) object.
1040 +/1041 publicautomatchFirst(R, RegEx)(Rinput, RegExre)
1042 if (isSomeString!R && isRegexFor!(RegEx, R))
1043 {
1044 returnmatchOnce(input, re);
1045 }
1046 1047 ///ditto1048 publicautomatchFirst(R, String)(Rinput, Stringre)
1049 if (isSomeString!R && isSomeString!String)
1050 {
1051 returnmatchOnce(input, regex(re));
1052 }
1053 1054 ///ditto1055 publicautomatchFirst(R, String)(Rinput, String[] re...)
1056 if (isSomeString!R && isSomeString!String)
1057 {
1058 returnmatchOnce(input, regex(re));
1059 }
1060 1061 /++
1062 Initiate a search for all non-overlapping matches to the pattern `re`
1063 in the given `input`. The result is a lazy range of matches generated
1064 as they are encountered in the input going left to right.
1065 1066 This function picks the most suitable regular expression engine
1067 depending on the pattern properties.
1068 1069 `re` parameter can be one of three types:
1070 $(UL
1071 $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1072 $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1073 compiled bytecode. )
1074 $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1075 compiled native machine code. )
1076 )
1077 1078 Returns:
1079 $(LREF RegexMatch) object that represents matcher state
1080 after the first match was found or an empty one if not present.
1081 +/1082 publicautomatchAll(R, RegEx)(Rinput, RegExre)
1083 if (isSomeString!R && isRegexFor!(RegEx, R))
1084 {
1085 returnmatchMany(input, re);
1086 }
1087 1088 ///ditto1089 publicautomatchAll(R, String)(Rinput, Stringre)
1090 if (isSomeString!R && isSomeString!String)
1091 {
1092 returnmatchMany(input, regex(re));
1093 }
1094 1095 ///ditto1096 publicautomatchAll(R, String)(Rinput, String[] re...)
1097 if (isSomeString!R && isSomeString!String)
1098 {
1099 returnmatchMany(input, regex(re));
1100 }
1101 1102 // another set of tests just to cover the new API1103 @systemunittest1104 {
1105 importstd.algorithm.comparison : equal;
1106 importstd.algorithm.iteration : map;
1107 importstd.conv : to;
1108 1109 staticforeach (String; AliasSeq!(string, wstring, const(dchar)[]))
1110 {{
1111 autostr1 = "blah-bleh".to!String();
1112 autopat1 = "bl[ae]h".to!String();
1113 automf = matchFirst(str1, pat1);
1114 assert(mf.equal(["blah".to!String()]));
1115 automAll = matchAll(str1, pat1);
1116 assert(mAll.equal!((a,b) => a.equal(b))
1117 ([["blah".to!String()], ["bleh".to!String()]]));
1118 1119 autostr2 = "1/03/12 - 3/03/12".to!String();
1120 autopat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1121 automf2 = matchFirst(str2, pat2);
1122 assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1123 automAll2 = matchAll(str2, pat2);
1124 assert(mAll2.front.equal(mf2));
1125 mAll2.popFront();
1126 assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1127 mf2.popFrontN(3);
1128 assert(mf2.equal(["12".to!String()]));
1129 1130 autoctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1131 autostr = "2 + 34/56 - 6/1".to!String();
1132 autocmf = matchFirst(str, ctPat);
1133 assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1134 assert(cmf["Quot"] == "34".to!String());
1135 assert(cmf["Denom"] == "56".to!String());
1136 1137 autocmAll = matchAll(str, ctPat);
1138 assert(cmAll.front.equal(cmf));
1139 cmAll.popFront();
1140 assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1141 }}
1142 }
1143 1144 /++
1145 Start matching of `input` to regex pattern `re`,
1146 using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1147 backtracking) matching scheme.
1148 1149 The use of this function is $(RED discouraged) - use either of
1150 $(LREF matchAll) or $(LREF matchFirst).
1151 1152 Delegating the kind of operation
1153 to "g" flag is soon to be phased out along with the
1154 ability to choose the exact matching scheme. The choice of
1155 matching scheme to use depends highly on the pattern kind and
1156 can done automatically on case by case basis.
1157 1158 Returns: a `RegexMatch` object holding engine
1159 state after first match.
1160 1161 +/1162 publicautobmatch(R, RegEx)(Rinput, RegExre)
1163 if (isSomeString!R && isRegexFor!(RegEx, R))
1164 {
1165 returnRegexMatch!(Unqual!(typeof(input)))(input, re);
1166 }
1167 1168 ///ditto1169 publicautobmatch(R, String)(Rinput, Stringre)
1170 if (isSomeString!R && isSomeString!String)
1171 {
1172 returnRegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1173 }
1174 1175 // produces replacement string from format using captures for substitution1176 packagevoidreplaceFmt(R, Capt, OutR)
1177 (Rformat, Captcaptures, OutRsink, boolignoreBadSubs = false)
1178 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1179 isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1180 {
1181 importstd.algorithm.searching : find;
1182 importstd.ascii : isDigit, isAlpha;
1183 importstd.conv : text, parse;
1184 importstd.exception : enforce;
1185 enumState { Normal, Dollar }
1186 autostate = State.Normal;
1187 size_toffset;
1188 L_Replace_Loop:
1189 while (!format.empty)
1190 finalswitch (state)
1191 {
1192 caseState.Normal:
1193 for (offset = 0; offset < format.length; offset++)//no decoding1194 {
1195 if (format[offset] == '$')
1196 {
1197 state = State.Dollar;
1198 sink.put(format[0 .. offset]);
1199 format = format[offset+1 .. $];//ditto1200 continueL_Replace_Loop;
1201 }
1202 }
1203 sink.put(format[0 .. offset]);
1204 format = format[offset .. $];
1205 break;
1206 caseState.Dollar:
1207 if (isDigit(format[0]))
1208 {
1209 uintdigit = parse!uint(format);
1210 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1211 if (digit < captures.length)
1212 sink.put(captures[digit]);
1213 }
1214 elseif (format[0] == '{')
1215 {
1216 autox = find!(a => !isAlpha(a))(format[1..$]);
1217 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1218 autoname = format[1 .. $ - x.length];
1219 format = x[1..$];
1220 enforce(!name.empty, "invalid name in ${...} replacement format");
1221 sink.put(captures[name]);
1222 }
1223 elseif (format[0] == '&')
1224 {
1225 sink.put(captures[0]);
1226 format = format[1 .. $];
1227 }
1228 elseif (format[0] == '`')
1229 {
1230 sink.put(captures.pre);
1231 format = format[1 .. $];
1232 }
1233 elseif (format[0] == '\'')
1234 {
1235 sink.put(captures.post);
1236 format = format[1 .. $];
1237 }
1238 elseif (format[0] == '$')
1239 {
1240 sink.put(format[0 .. 1]);
1241 format = format[1 .. $];
1242 }
1243 state = State.Normal;
1244 break;
1245 }
1246 enforce(state == State.Normal, "invalid format string in regex replace");
1247 }
1248 1249 /++
1250 Construct a new string from `input` by replacing the first match with
1251 a string generated from it according to the `format` specifier.
1252 1253 To replace all matches use $(LREF replaceAll).
1254 1255 Params:
1256 input = string to search
1257 re = compiled regular expression to use
1258 format = _format string to generate replacements from,
1259 see $(S_LINK Replace _format string, the _format string).
1260 1261 Returns:
1262 A string of the same type with the first match (if any) replaced.
1263 If no match is found returns the input string itself.
1264 +/1265 publicRreplaceFirst(R, C, RegEx)(Rinput, RegExre, const(C)[] format)
1266 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1267 {
1268 returnreplaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1269 }
1270 1271 ///1272 @systemunittest1273 {
1274 assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1275 }
1276 1277 /++
1278 This is a general replacement tool that construct a new string by replacing
1279 matches of pattern `re` in the `input`. Unlike the other overload
1280 there is no format string instead captures are passed to
1281 to a user-defined functor `fun` that returns a new string
1282 to use as replacement.
1283 1284 This version replaces the first match in `input`,
1285 see $(LREF replaceAll) to replace the all of the matches.
1286 1287 Returns:
1288 A new string of the same type as `input` with all matches
1289 replaced by return values of `fun`. If no matches found
1290 returns the `input` itself.
1291 +/1292 publicRreplaceFirst(aliasfun, R, RegEx)(Rinput, RegExre)
1293 if (isSomeString!R && isRegexFor!(RegEx, R))
1294 {
1295 returnreplaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1296 }
1297 1298 ///1299 @systemunittest1300 {
1301 importstd.conv : to;
1302 stringlist = "#21 out of 46";
1303 stringnewList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1304 (list, regex(`[0-9]+`));
1305 assert(newList == "#22 out of 46");
1306 }
1307 1308 /++
1309 A variation on $(LREF replaceFirst) that instead of allocating a new string
1310 on each call outputs the result piece-wise to the `sink`. In particular
1311 this enables efficient construction of a final output incrementally.
1312 1313 Like in $(LREF replaceFirst) family of functions there is an overload
1314 for the substitution guided by the `format` string
1315 and the one with the user defined callback.
1316 +/1317 public @trustedvoidreplaceFirstInto(Sink, R, C, RegEx)
1318 (refSinksink, Rinput, RegExre, const(C)[] format)
1319 if (isOutputRange!(Sink, dchar) && isSomeString!R1320 && is(C : dchar) && isRegexFor!(RegEx, R))
1321 {
1322 replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1323 (sink, input, matchFirst(input, re));
1324 }
1325 1326 ///ditto1327 public @trustedvoidreplaceFirstInto(aliasfun, Sink, R, RegEx)
1328 (Sinksink, Rinput, RegExre)
1329 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1330 {
1331 replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1332 }
1333 1334 ///1335 @systemunittest1336 {
1337 importstd.array;
1338 stringm1 = "first message\n";
1339 stringm2 = "second message\n";
1340 autoresult = appender!string();
1341 replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1342 //equivalent of the above with user-defined callback1343 replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1344 assert(result.data == "first\nsecond\n");
1345 }
1346 1347 //examples for replaceFirst1348 @systemunittest1349 {
1350 importstd.conv;
1351 stringlist = "#21 out of 46";
1352 stringnewList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1353 (list, regex(`[0-9]+`));
1354 assert(newList == "#22 out of 46");
1355 importstd.array;
1356 stringm1 = "first message\n";
1357 stringm2 = "second message\n";
1358 autoresult = appender!string();
1359 replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1360 //equivalent of the above with user-defined callback1361 replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1362 assert(result.data == "first\nsecond\n");
1363 }
1364 1365 /++
1366 Construct a new string from `input` by replacing all of the
1367 fragments that match a pattern `re` with a string generated
1368 from the match according to the `format` specifier.
1369 1370 To replace only the first match use $(LREF replaceFirst).
1371 1372 Params:
1373 input = string to search
1374 re = compiled regular expression to use
1375 format = _format string to generate replacements from,
1376 see $(S_LINK Replace _format string, the _format string).
1377 1378 Returns:
1379 A string of the same type as `input` with the all
1380 of the matches (if any) replaced.
1381 If no match is found returns the input string itself.
1382 +/1383 public @trustedRreplaceAll(R, C, RegEx)(Rinput, RegExre, const(C)[] format)
1384 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1385 {
1386 returnreplaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1387 }
1388 1389 ///1390 @systemunittest1391 {
1392 // insert comma as thousands delimiter1393 autore = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1394 assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1395 }
1396 1397 /++
1398 This is a general replacement tool that construct a new string by replacing
1399 matches of pattern `re` in the `input`. Unlike the other overload
1400 there is no format string instead captures are passed to
1401 to a user-defined functor `fun` that returns a new string
1402 to use as replacement.
1403 1404 This version replaces all of the matches found in `input`,
1405 see $(LREF replaceFirst) to replace the first match only.
1406 1407 Returns:
1408 A new string of the same type as `input` with all matches
1409 replaced by return values of `fun`. If no matches found
1410 returns the `input` itself.
1411 1412 Params:
1413 input = string to search
1414 re = compiled regular expression
1415 fun = delegate to use
1416 +/1417 public @trustedRreplaceAll(aliasfun, R, RegEx)(Rinput, RegExre)
1418 if (isSomeString!R && isRegexFor!(RegEx, R))
1419 {
1420 returnreplaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1421 }
1422 1423 ///1424 @systemunittest1425 {
1426 stringbaz(Captures!(string) m)
1427 {
1428 importstd.string : toUpper;
1429 returntoUpper(m.hit);
1430 }
1431 // Capitalize the letters 'a' and 'r':1432 autos = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1433 regex("[ar]"));
1434 assert(s == "StRAp A Rocket engine on A chicken.");
1435 }
1436 1437 /++
1438 A variation on $(LREF replaceAll) that instead of allocating a new string
1439 on each call outputs the result piece-wise to the `sink`. In particular
1440 this enables efficient construction of a final output incrementally.
1441 1442 As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1443 the other one with a user defined functor.
1444 +/1445 public @trustedvoidreplaceAllInto(Sink, R, C, RegEx)
1446 (Sinksink, Rinput, RegExre, const(C)[] format)
1447 if (isOutputRange!(Sink, dchar) && isSomeString!R1448 && is(C : dchar) && isRegexFor!(RegEx, R))
1449 {
1450 replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1451 (sink, input, matchAll(input, re));
1452 }
1453 1454 ///ditto1455 public @trustedvoidreplaceAllInto(aliasfun, Sink, R, RegEx)
1456 (Sinksink, Rinput, RegExre)
1457 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1458 {
1459 replaceMatchesInto!fun(sink, input, matchAll(input, re));
1460 }
1461 1462 ///1463 @systemunittest1464 {
1465 // insert comma as thousands delimiter in fifty randomly produced big numbers1466 importstd.array, std.conv, std.random, std.range;
1467 staticre = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1468 autosink = appender!(char [])();
1469 enumulongmin = 10UL ^^ 10, max = 10UL ^^ 19;
1470 foreach (i; 0 .. 50)
1471 {
1472 sink.clear();
1473 replaceAllInto(sink, text(uniform(min, max)), re, ",");
1474 foreach (pos; iota(sink.data.length - 4, 0, -4))
1475 assert(sink.data[pos] == ',');
1476 }
1477 }
1478 1479 // exercise all of the replace APIs1480 @systemunittest1481 {
1482 importstd.array : appender;
1483 importstd.conv;
1484 // try and check first/all simple substitution1485 staticforeach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1486 {{
1487 Ss1 = "curt trial".to!S();
1488 Ss2 = "round dome".to!S();
1489 St1F = "court trial".to!S();
1490 St2F = "hound dome".to!S();
1491 St1A = "court trial".to!S();
1492 St2A = "hound home".to!S();
1493 autore1 = regex("curt".to!S());
1494 autore2 = regex("[dr]o".to!S());
1495 1496 assert(replaceFirst(s1, re1, "court") == t1F);
1497 assert(replaceFirst(s2, re2, "ho") == t2F);
1498 assert(replaceAll(s1, re1, "court") == t1A);
1499 assert(replaceAll(s2, re2, "ho") == t2A);
1500 1501 autorep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1502 assert(rep1 == t1F);
1503 assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1504 autorep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1505 assert(rep1A == t1A);
1506 assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1507 1508 autosink = appender!S();
1509 replaceFirstInto(sink, s1, re1, "court");
1510 assert(sink.data == t1F);
1511 replaceFirstInto(sink, s2, re2, "ho");
1512 assert(sink.data == t1F~t2F);
1513 replaceAllInto(sink, s1, re1, "court");
1514 assert(sink.data == t1F~t2F~t1A);
1515 replaceAllInto(sink, s2, re2, "ho");
1516 assert(sink.data == t1F~t2F~t1A~t2A);
1517 }}
1518 }
1519 1520 /++
1521 Old API for replacement, operation depends on flags of pattern `re`.
1522 With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1523 works the same as $(LREF replaceFirst).
1524 1525 The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1526 or $(LREF replaceFirst) explicitly.
1527 +/1528 publicRreplace(aliasscheme = match, R, C, RegEx)(Rinput, RegExre, const(C)[] format)
1529 if (isSomeString!R && isRegexFor!(RegEx, R))
1530 {
1531 returnreplaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1532 }
1533 1534 ///ditto1535 publicRreplace(aliasfun, R, RegEx)(Rinput, RegExre)
1536 if (isSomeString!R && isRegexFor!(RegEx, R))
1537 {
1538 returnreplaceAllWith!(fun, match)(input, re);
1539 }
1540 1541 /**
1542 Splits a string `r` using a regular expression `pat` as a separator.
1543 1544 Params:
1545 keepSeparators = flag to specify if the matches should be in the resulting range
1546 r = the string to split
1547 pat = the pattern to split on
1548 Returns:
1549 A lazy range of strings
1550 */1551 publicstructSplitter(Flag!"keepSeparators"keepSeparators = No.keepSeparators, Range, aliasRegEx = Regex)
1552 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1553 {
1554 private:
1555 Range_input;
1556 size_t_offset;
1557 aliasRx = typeof(match(Range.init,RegEx.init));
1558 Rx_match;
1559 1560 staticif (keepSeparators) boolonMatch = false;
1561 1562 @trustedthis(Rangeinput, RegExseparator)
1563 {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted1564 _input = input;
1565 constre = separator.withFlags(separator.flags | RegexOption.global);
1566 if (_input.empty)
1567 {
1568 //there is nothing to match at all, make _offset > 01569 _offset = 1;
1570 }
1571 else1572 {
1573 _match = Rx(_input, re);
1574 1575 staticif (keepSeparators)
1576 if (_match.pre.empty)
1577 popFront();
1578 }
1579 }
1580 1581 public:
1582 autorefopSlice()
1583 {
1584 returnthis.save;
1585 }
1586 1587 ///Forward range primitives.1588 @propertyRangefront()
1589 {
1590 importstd.algorithm.comparison : min;
1591 1592 assert(!empty && _offset <= _match.pre.length1593 && _match.pre.length <= _input.length);
1594 1595 staticif (keepSeparators)
1596 {
1597 if (!onMatch)
1598 return_input[_offset .. min($, _match.pre.length)];
1599 else1600 return_match.hit();
1601 }
1602 else1603 {
1604 return_input[_offset .. min($, _match.pre.length)];
1605 }
1606 }
1607 1608 ///ditto1609 @propertyboolempty()
1610 {
1611 staticif (keepSeparators)
1612 return_offset >= _input.length;
1613 else1614 return_offset > _input.length;
1615 }
1616 1617 ///ditto1618 voidpopFront()
1619 {
1620 assert(!empty);
1621 if (_match.empty)
1622 {
1623 //No more separators, work is done here1624 _offset = _input.length + 1;
1625 }
1626 else1627 {
1628 staticif (keepSeparators)
1629 {
1630 if (!onMatch)
1631 {
1632 //skip past the separator1633 _offset = _match.pre.length;
1634 }
1635 else1636 {
1637 _offset += _match.hit.length;
1638 _match.popFront();
1639 }
1640 1641 onMatch = !onMatch;
1642 }
1643 else1644 {
1645 //skip past the separator1646 _offset = _match.pre.length + _match.hit.length;
1647 _match.popFront();
1648 }
1649 }
1650 }
1651 1652 ///ditto1653 @propertyautosave()
1654 {
1655 returnthis;
1656 }
1657 }
1658 1659 /// ditto1660 publicSplitter!(keepSeparators, Range, RegEx) splitter(
1661 Flag!"keepSeparators"keepSeparators = No.keepSeparators, Range, RegEx)(Ranger, RegExpat)
1662 if (
1663 is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1664 {
1665 returnSplitter!(keepSeparators, Range, RegEx)(r, pat);
1666 }
1667 1668 ///1669 @systemunittest1670 {
1671 importstd.algorithm.comparison : equal;
1672 autos1 = ", abc, de, fg, hi, ";
1673 assert(equal(splitter(s1, regex(", *")),
1674 ["", "abc", "de", "fg", "hi", ""]));
1675 }
1676 1677 /// Split on a pattern, but keep the matches in the resulting range1678 @systemunittest1679 {
1680 importstd.algorithm.comparison : equal;
1681 importstd.typecons : Yes;
1682 1683 autopattern = regex(`([\.,])`);
1684 1685 assert("2003.04.05"1686 .splitter!(Yes.keepSeparators)(pattern)
1687 .equal(["2003", ".", "04", ".", "05"]));
1688 1689 assert(",1,2,3"1690 .splitter!(Yes.keepSeparators)(pattern)
1691 .equal([",", "1", ",", "2", ",", "3"]));
1692 }
1693 1694 ///An eager version of `splitter` that creates an array with splitted slices of `input`.1695 public @trustedString[] split(String, RegEx)(Stringinput, RegExrx)
1696 if (isSomeString!String && isRegexFor!(RegEx, String))
1697 {
1698 importstd.array : appender;
1699 autoa = appender!(String[])();
1700 foreach (e; splitter(input, rx))
1701 a.put(e);
1702 returna.data;
1703 }
1704 1705 ///Exception object thrown in case of errors during regex compilation.1706 publicaliasRegexException = std.regex.internal.ir.RegexException;
1707 1708 /++
1709 A range that lazily produces a string output escaped
1710 to be used inside of a regular expression.
1711 +/1712 autoescaper(Range)(Ranger)
1713 {
1714 importstd.algorithm.searching : find;
1715 staticimmutableescapables = [Escapables];
1716 staticstructEscaper// template to deduce attributes1717 {
1718 Ranger;
1719 boolescaped;
1720 1721 @propertyElementType!Rangefront(){
1722 if (escaped)
1723 return'\\';
1724 else1725 returnr.front;
1726 }
1727 1728 @propertyboolempty(){ returnr.empty; }
1729 1730 voidpopFront(){
1731 if (escaped) escaped = false;
1732 else1733 {
1734 r.popFront();
1735 if (!r.empty && !escapables.find(r.front).empty)
1736 escaped = true;
1737 }
1738 }
1739 1740 @propertyautosave(){ returnEscaper(r.save, escaped); }
1741 }
1742 1743 boolescaped = !r.empty && !escapables.find(r.front).empty;
1744 returnEscaper(r, escaped);
1745 }
1746 1747 ///1748 @systemunittest1749 {
1750 importstd.algorithm.comparison;
1751 importstd.regex;
1752 strings = `This is {unfriendly} to *regex*`;
1753 assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1754 }
1755 1756 @systemunittest1757 {
1758 importstd.algorithm.comparison;
1759 importstd.conv;
1760 staticforeach (S; AliasSeq!(string, wstring, dstring))
1761 {{
1762 autos = "^".to!S;
1763 assert(s.escaper.equal(`\^`));
1764 autos2 = "";
1765 assert(s2.escaper.equal(""));
1766 }}
1767 }
1768 1769 @systemunittest1770 {
1771 assert("ab".matchFirst(regex(`a?b?`)).hit == "ab");
1772 assert("ab".matchFirst(regex(`a??b?`)).hit == "");
1773 }