The OpenD Programming Language

1 /++
2   $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3   are a commonly used method of pattern matching
4   on strings, with $(I regex) being a catchy word for a pattern in this domain
5   specific language. Typical problems usually solved by regular expressions
6   include validation of user input and the ubiquitous find $(AMP) replace
7   in text processing utilities.
8 
9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(DIVC quickindex,
11 $(BOOKTABLE,
12 $(TR $(TH Category) $(TH Functions))
13 $(TR $(TD Matching) $(TD
14         $(LREF bmatch)
15         $(LREF match)
16         $(LREF matchAll)
17         $(LREF matchFirst)
18 ))
19 $(TR $(TD Building) $(TD
20         $(LREF ctRegex)
21         $(LREF escaper)
22         $(LREF regex)
23 ))
24 $(TR $(TD Replace) $(TD
25         $(LREF replace)
26         $(LREF replaceAll)
27         $(LREF replaceAllInto)
28         $(LREF replaceFirst)
29         $(LREF replaceFirstInto)
30 ))
31 $(TR $(TD Split) $(TD
32         $(LREF split)
33         $(LREF splitter)
34 ))
35 $(TR $(TD Objects) $(TD
36         $(LREF Captures)
37         $(LREF Regex)
38         $(LREF RegexException)
39         $(LREF RegexMatch)
40         $(LREF Splitter)
41         $(LREF StaticRegex)
42 ))
43 ))
44 
45   $(SECTION Synopsis)
46 
47   Create a regex at runtime:
48   $(RUNNABLE_EXAMPLE
49   $(RUNNABLE_EXAMPLE_STDIN
50 They met on 24/01/1970.
51 7/8/99 wasn't as hot as 7/8/2022.
52 )
53       ---
54       import std.regex;
55       import std.stdio;
56       // Print out all possible dd/mm/yy(yy) dates found in user input.
57       auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
58       foreach (line; stdin.byLine)
59       {
60         // matchAll() returns a range that can be iterated
61         // to get all subsequent matches.
62         foreach (c; matchAll(line, r))
63             writeln(c.hit);
64       }
65       ---
66   )
67   Create a static regex at compile-time, which contains fast native code:
68   $(RUNNABLE_EXAMPLE
69   ---
70   import std.regex;
71   auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
72 
73   // It works just like a normal regex:
74   auto c2 = matchFirst("foo/bar", ctr);   // First match found here, if any
75   assert(!c2.empty);   // Be sure to check if there is a match before examining contents!
76   assert(c2[1] == "bar");   // Captures is a range of submatches: 0 = full match.
77   ---
78   )
79   Multi-pattern regex:
80   $(RUNNABLE_EXAMPLE
81   ---
82   import std.regex;
83   auto multi = regex([`\d+,\d+`, `([a-z]+):(\d+)`]);
84   auto m = "abc:43 12,34".matchAll(multi);
85   assert(m.front.whichPattern == 2);
86   assert(m.front[1] == "abc");
87   assert(m.front[2] == "43");
88   m.popFront();
89   assert(m.front.whichPattern == 1);
90   assert(m.front[0] == "12,34");
91   ---
92   )
93   $(LREF Captures) and `opCast!bool`:
94   $(RUNNABLE_EXAMPLE
95   ---
96   import std.regex;
97   // The result of `matchAll/matchFirst` is directly testable with `if/assert/while`,
98   // e.g. test if a string consists of letters only:
99   assert(matchFirst("LettersOnly", `^\p{L}+$`));
100 
101   // And we can take advantage of the ability to define a variable in the IfCondition:
102   if (const captures = matchFirst("At l34st one digit, but maybe more...", `((\d)(\d*))`))
103   {
104       assert(captures[2] == "3");
105       assert(captures[3] == "4");
106       assert(captures[1] == "34");
107   }
108   ---
109   )
110   See_Also: $(LINK2 https://dlang.org/spec/statement.html#IfCondition, `IfCondition`).
111 
112   $(SECTION Syntax and general information)
113   The general usage guideline is to keep regex complexity on the side of simplicity,
114   as its capabilities reside in purely character-level manipulation.
115   As such it's ill-suited for tasks involving higher level invariants
116   like matching an integer number $(U bounded) in an [a,b] interval.
117   Checks of this sort of are better addressed by additional post-processing.
118 
119   The basic syntax shouldn't surprise experienced users of regular expressions.
120   For an introduction to `std.regex` see a
121   $(HTTP dlang.org/regular-expression.html, short tour) of the module API
122   and its abilities.
123 
124   There are other web resources on regular expressions to help newcomers,
125   and a good $(HTTP www.regular-expressions.info, reference with tutorial)
126   can easily be found.
127 
128   This library uses a remarkably common ECMAScript syntax flavor
129   with the following extensions:
130   $(UL
131     $(LI Named subexpressions, with Python syntax. )
132     $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
133     $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
134   )
135 
136   $(REG_START Pattern syntax )
137   $(I std.regex operates on codepoint level,
138     'character' in this table denotes a single Unicode codepoint.)
139   $(REG_TABLE
140     $(REG_TITLE Pattern element, Semantics )
141     $(REG_TITLE Atoms, Match single characters )
142     $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
143     $(REG_ROW ., In single line mode matches any character.
144       Otherwise it matches any character except '\n' and '\r'. )
145     $(REG_ROW [class], Matches a single character
146       that belongs to this character class. )
147     $(REG_ROW [^class], Matches a single character that
148       does $(U not) belong to this character class.)
149     $(REG_ROW \cC, Matches the control character corresponding to letter C)
150     $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
151     $(REG_ROW \uXXXX, Matches a character  with hexadecimal value of XXXX. )
152     $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
153     $(REG_ROW \f, Matches a formfeed character. )
154     $(REG_ROW \n, Matches a linefeed character. )
155     $(REG_ROW \r, Matches a carriage return character. )
156     $(REG_ROW \t, Matches a tab character. )
157     $(REG_ROW \v, Matches a vertical tab character. )
158     $(REG_ROW \d, Matches any Unicode digit. )
159     $(REG_ROW \D, Matches any character except Unicode digits. )
160     $(REG_ROW \w, Matches any word character (note: this includes numbers).)
161     $(REG_ROW \W, Matches any non-word character.)
162     $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
163     $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
164     $(REG_ROW \\\\, Matches \ character. )
165     $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
166     $(REG_ROW \p{PropertyName}, Matches a character that belongs
167         to the Unicode PropertyName set.
168       Single letter abbreviations can be used without surrounding {,}. )
169     $(REG_ROW  \P{PropertyName}, Matches a character that does not belong
170         to the Unicode PropertyName set.
171       Single letter abbreviations can be used without surrounding {,}. )
172     $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
173           the BasicLatin Unicode $(U block).)
174     $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
175           the BasicLatin Unicode $(U block).)
176     $(REG_ROW \p{Cyrillic}, Matches any character that is part of
177         Cyrillic $(U script).)
178     $(REG_ROW \P{Cyrillic}, Matches any character except ones in
179         Cyrillic $(U script).)
180     $(REG_TITLE Quantifiers, Specify repetition of other elements)
181     $(REG_ROW *, Matches previous character/subexpression 0 or more times.
182       Greedy version - tries as many times as possible.)
183     $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
184       Lazy version  - stops as early as possible.)
185     $(REG_ROW +, Matches previous character/subexpression 1 or more times.
186       Greedy version - tries as many times as possible.)
187     $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
188       Lazy version  - stops as early as possible.)
189     $(REG_ROW ?, Matches previous character/subexpression 0 or 1 time.
190       Greedy version - tries as many times as possible.)
191     $(REG_ROW ??, Matches previous character/subexpression 0 or 1 time.
192       Lazy version  - stops as early as possible.)
193     $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
194     $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
195       Greedy version - tries as many times as possible. )
196     $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
197       Lazy version - stops as early as possible.)
198     $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
199       Greedy version - tries as many times as possible, but no more than m times. )
200     $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
201       Lazy version - stops as early as possible, but no less then n times.)
202     $(REG_TITLE Other, Subexpressions $(AMP) alternations )
203     $(REG_ROW (regex),  Matches subexpression regex,
204       saving matched portion of text for later retrieval. )
205     $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
206     $(REG_ROW (?:regex), Matches subexpression regex,
207       $(U not) saving matched portion of text. Useful to speed up matching. )
208     $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
209     $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
210         regex labeling it with name 'name'.
211         When referring to a matched portion of text,
212         names work like aliases in addition to direct numbers.
213      )
214     $(REG_TITLE Assertions, Match position rather than character )
215     $(REG_ROW ^, Matches at the beginning of input or line (in multiline mode).)
216     $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
217     $(REG_ROW \b, Matches at word boundary. )
218     $(REG_ROW \B, Matches when $(U not) at word boundary. )
219     $(REG_ROW (?=regex), Zero-width lookahead assertion.
220         Matches at a point where the subexpression
221         regex could be matched starting from the current position.
222       )
223     $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
224         Matches at a point where the subexpression
225         regex could $(U not) be matched starting from the current position.
226       )
227     $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
228         where the subexpression regex could be matched ending
229         at the current position (matching goes backwards).
230       )
231     $(REG_ROW  (?<!regex), Zero-width negative lookbehind assertion.
232       Matches at a point where the subexpression regex could $(U not)
233       be matched ending at the current position (matching goes backwards).
234      )
235   )
236 
237   $(REG_START Character classes )
238   $(REG_TABLE
239     $(REG_TITLE Pattern element, Semantics )
240     $(REG_ROW Any atom, Has the same meaning as outside of a character class,
241       except for ] which must be written as \\])
242     $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
243     $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
244      Where a, b are arbitrary classes, means union, set difference,
245      symmetric set difference, and intersection respectively.
246      $(I Any sequence of character class elements implicitly forms a union.) )
247   )
248 
249   $(REG_START Regex flags )
250   $(REG_TABLE
251     $(REG_TITLE Flag, Semantics )
252     $(REG_ROW g, Global regex, repeat over the whole input. )
253     $(REG_ROW i, Case insensitive matching. )
254     $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
255        as well as start and end of input.)
256     $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
257     $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
258       useful for formatting complex regular expressions. )
259   )
260 
261   $(SECTION Unicode support)
262 
263   This library provides full Level 1 support* according to
264     $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
265   $(UL
266     $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
267     $(LI 1.2 Unicode properties.)
268     $(LI 1.3 Character classes with set operations.)
269     $(LI 1.4 Word boundaries use the full set of "word" characters.)
270     $(LI 1.5 Using simple casefolding to match case
271         insensitively across the full range of codepoints.)
272     $(LI 1.6 Respecting line breaks as any of
273         \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
274     $(LI 1.7 Operating on codepoint level.)
275   )
276   *With exception of point 1.1.1, as of yet, normalization of input
277     is expected to be enforced by user.
278 
279     $(SECTION Replace format string)
280 
281     A set of functions in this module that do the substitution rely
282     on a simple format to guide the process. In particular the table below
283     applies to the `format` argument of
284     $(LREF replaceFirst) and $(LREF replaceAll).
285 
286     The format string can reference parts of match using the following notation.
287     $(REG_TABLE
288         $(REG_TITLE Format specifier, Replaced by )
289         $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
290         $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
291         $(REG_ROW $', part of input $(I following) the match. )
292         $(REG_ROW $$, '$' character. )
293         $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
294         $(REG_ROW \\\\, '\\' character. )
295         $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
296     )
297 
298   $(SECTION Slicing and zero memory allocations orientation)
299 
300   All matches returned by pattern matching functionality in this library
301     are slices of the original input. The notable exception is the `replace`
302     family of functions  that generate a new string from the input.
303 
304     In cases where producing the replacement is the ultimate goal
305     $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
306     as functions that  avoid allocations even for replacement.
307 
308     Copyright: Copyright Dmitry Olshansky, 2011-
309 
310   License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
311 
312   Authors: Dmitry Olshansky,
313 
314     API and utility constructs are modeled after the original `std.regex`
315   by Walter Bright and Andrei Alexandrescu.
316 
317   Source: $(PHOBOSSRC std/regex/package.d)
318 
319 Macros:
320     REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
321     REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
322     REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
323     REG_START = <h3><div align="center"> $0 </div></h3>
324     SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
325     S_LINK = <a href="#$1">$+</a>
326  +/
327 module std.regex;
328 
329 import std.range.primitives, std.traits;
330 import std.regex.internal.ir;
331 import std.typecons : Flag, Yes, No;
332 
333 /++
334     `Regex` object holds regular expression pattern in compiled form.
335 
336     Instances of this object are constructed via calls to `regex`.
337     This is an intended form for caching and storage of frequently
338     used regular expressions.
339 
340     Example:
341 
342     Test if this object doesn't contain any compiled pattern.
343     ---
344     Regex!char r;
345     assert(r.empty);
346     r = regex(""); // Note: "" is a valid regex pattern.
347     assert(!r.empty);
348     ---
349 
350     Getting a range of all the named captures in the regex.
351     ----
352     import std.range;
353     import std.algorithm;
354 
355     auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
356     auto nc = re.namedCaptures;
357     static assert(isRandomAccessRange!(typeof(nc)));
358     assert(!nc.empty);
359     assert(nc.length == 2);
360     assert(nc.equal(["name", "var"]));
361     assert(nc[0] == "name");
362     assert(nc[1..$].equal(["var"]));
363     ----
364 +/
365 public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
366 
367 /++
368     A `StaticRegex` is `Regex` object that contains D code specially
369     generated at compile-time to speed up matching.
370 
371     No longer used, kept as alias to Regex for backwards compatibility.
372 +/
373 public alias StaticRegex = Regex;
374 
375 /++
376     Compile regular expression pattern for the later execution.
377     Returns: `Regex` object that works on inputs having
378     the same character width as `pattern`.
379 
380     Params:
381     pattern = A single regular expression to match.
382     patterns = An array of regular expression strings.
383         The resulting `Regex` object will match any expression;
384         use $(LREF whichPattern) to know which.
385     flags = The _attributes (g, i, m, s and x accepted)
386 
387     Throws: `RegexException` if there were any errors during compilation.
388 +/
389 @trusted public auto regex(S : C[], C)(const S[] patterns, const(char)[] flags="")
390 if (isSomeString!(S))
391 {
392     import std.array : appender;
393     import std.functional : memoize;
394     enum cacheSize = 8; //TODO: invent nice interface to control regex caching
395     const(C)[] pat;
396     if (patterns.length > 1)
397     {
398         auto app = appender!S();
399         foreach (i, p; patterns)
400         {
401             if (i != 0)
402                 app.put("|");
403             app.put("(?:");
404             app.put(patterns[i]);
405             // terminator for the pattern
406             // to detect if the pattern unexpectedly ends
407             app.put("\\");
408             app.put(cast(dchar)(privateUseStart+i));
409             app.put(")");
410             // another one to return correct whichPattern
411             // for all of potential alternatives in the patterns[i]
412             app.put("\\");
413             app.put(cast(dchar)(privateUseStart+i));
414         }
415         pat = app.data;
416     }
417     else
418         pat = patterns[0];
419 
420     if (__ctfe)
421         return regexImpl(pat, flags);
422     return memoize!(regexImpl!S, cacheSize)(pat, flags);
423 }
424 
425 ///ditto
426 @trusted public auto regex(S)(S pattern, const(char)[] flags="")
427 if (isSomeString!(S))
428 {
429     return regex([pattern], flags);
430 }
431 
432 ///
433 @system unittest
434 {
435     void test(S)()
436     {
437         // multi-pattern regex example
438         S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
439         auto multi = regex(arr); // multi regex
440         S str = "abc:43 12,34";
441         auto m = str.matchAll(multi);
442         assert(m.front.whichPattern == 1);
443         assert(m.front[1] == "abc");
444         assert(m.front[2] == "43");
445         m.popFront();
446         assert(m.front.whichPattern == 2);
447         assert(m.front[1] == "12");
448     }
449 
450     import std.meta : AliasSeq;
451     static foreach (C; AliasSeq!(string, wstring, dstring))
452         // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301
453         static foreach (S; AliasSeq!(C, const C, immutable C))
454             test!S();
455 }
456 
457 @system unittest
458 {
459     import std.conv : to;
460     import std.string : indexOf;
461 
462     immutable pattern = "s+";
463     auto regexString = to!string(regex(pattern, "U"));
464     assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
465     assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
466     assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
467 }
468 
469 public auto regexImpl(S)(const S pattern, const(char)[] flags="")
470 if (isSomeString!(typeof(pattern)))
471 {
472     import std.regex.internal.parser : Parser, CodeGen;
473     auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
474     auto r = parser.program;
475     return r;
476 }
477 
478 
479 private struct CTRegexWrapper(Char)
480 {
481     private immutable(Regex!Char)* re;
482 
483     // allow code that expects mutable Regex to still work
484     // we stay "logically const"
485     @property @trusted ref getRe() const { return *cast(Regex!Char*) re; }
486     alias getRe this;
487 }
488 
489 template ctRegexImpl(alias pattern, string flags="")
490 {
491     import std.regex.internal.backtracking, std.regex.internal.parser;
492     static immutable r = cast(immutable) regex(pattern, flags);
493     alias Char = BasicElementOf!(typeof(pattern));
494     enum source = ctGenRegExCode(r);
495     @trusted pure bool func(BacktrackingMatcher!Char matcher)
496     {
497         debug(std_regex_ctr) pragma(msg, source);
498         cast(void) matcher;
499         mixin(source);
500     }
501     static immutable staticRe =
502         cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
503     enum wrapper = CTRegexWrapper!Char(&staticRe);
504 }
505 
506 @safe pure unittest
507 {
508     // test compat for logical const workaround
509     static void test(StaticRegex!char)
510     {
511     }
512     enum re = ctRegex!``;
513     test(re);
514 }
515 
516 @safe pure unittest
517 {
518     auto re = ctRegex!`foo`;
519     assert(matchFirst("foo", re));
520 
521     // test reassignment
522     re = ctRegex!`bar`;
523     assert(matchFirst("bar", re));
524     assert(!matchFirst("bar", ctRegex!`foo`));
525 }
526 
527 /++
528     Compile regular expression using CTFE
529     and generate optimized native machine code for matching it.
530 
531     Returns: StaticRegex object for faster matching.
532 
533     Params:
534     pattern = Regular expression
535     flags = The _attributes (g, i, m, s and x accepted)
536 +/
537 public enum ctRegex(alias pattern, string flags="") = ctRegexImpl!(pattern, flags).wrapper;
538 
539 enum isRegexFor(RegEx, R) = is(immutable RegEx == immutable Regex!(BasicElementOf!R))
540      || is(RegEx : const(Regex!(BasicElementOf!R)))
541      || is(immutable RegEx == immutable StaticRegex!(BasicElementOf!R));
542 
543 
544 /++
545     `Captures` object contains submatches captured during a call
546     to `match` or iteration over `RegexMatch` range.
547 
548     First element of range is the whole match.
549 +/
550 @trusted public struct Captures(R)
551 if (isSomeString!R)
552 {//@trusted because of union inside
553     alias DataIndex = size_t;
554     alias String = R;
555     alias Store = SmallFixedArray!(Group!DataIndex, 3);
556 private:
557     import std.conv : text;
558     Store matches;
559     const(NamedGroup)[] _names;
560     R _input;
561     int _nMatch;
562     uint _f, _b;
563 
564     this(R input, uint n, const(NamedGroup)[] named)
565     {
566         _input = input;
567         _names = named;
568         matches = Store(n);
569         _b = n;
570         _f = 0;
571     }
572 
573     this(ref RegexMatch!R rmatch)
574     {
575         _input = rmatch._input;
576         _names = rmatch._engine.pattern.dict;
577         immutable n = rmatch._engine.pattern.ngroup;
578         matches = Store(n);
579         _b = n;
580         _f = 0;
581     }
582 
583     inout(R) getMatch(size_t index) inout
584     {
585         auto m = &matches[index];
586         return *m ? _input[m.begin .. m.end] : null;
587     }
588 
589 public:
590     ///Slice of input prior to the match.
591     @property R pre()
592     {
593         return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
594     }
595 
596     ///Slice of input immediately after the match.
597     @property R post()
598     {
599         return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
600     }
601 
602     ///Slice of matched portion of input.
603     @property R hit()
604     {
605         assert(_nMatch, "attempted to get hit of an empty match");
606         return _input[matches[0].begin .. matches[0].end];
607     }
608 
609     ///Range interface.
610     @property R front()
611     {
612         assert(_nMatch, "attempted to get front of an empty match");
613         return getMatch(_f);
614     }
615 
616     ///ditto
617     @property R back()
618     {
619         assert(_nMatch, "attempted to get back of an empty match");
620         return getMatch(_b - 1);
621     }
622 
623     ///ditto
624     void popFront()
625     {
626         assert(!empty);
627         ++_f;
628     }
629 
630     ///ditto
631     void popBack()
632     {
633         assert(!empty);
634         --_b;
635     }
636 
637     ///ditto
638     @property bool empty() const { return _nMatch == 0 || _f >= _b; }
639 
640     ///ditto
641     inout(R) opIndex()(size_t i) inout
642     {
643         assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
644         return getMatch(_f + i);
645     }
646 
647     /++
648         Explicit cast to bool.
649         Useful as a shorthand for !(x.empty) in if and assert statements.
650 
651         ---
652         import std.regex;
653 
654         assert(!matchFirst("nothing", "something"));
655         ---
656     +/
657 
658     @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
659 
660     /++
661         Number of pattern matched counting, where 1 - the first pattern.
662         Returns 0 on no match.
663     +/
664 
665     @safe @property int whichPattern() const nothrow { return _nMatch; }
666 
667     ///
668     @system unittest
669     {
670         import std.regex;
671         assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
672     }
673 
674     /++
675         Lookup named submatch.
676 
677         ---
678         import std.regex;
679         import std.range;
680 
681         auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
682         assert(c["var"] == "a");
683         assert(c["value"] == "42");
684         popFrontN(c, 2);
685         //named groups are unaffected by range primitives
686         assert(c["var"] =="a");
687         assert(c.front == "42");
688         ----
689     +/
690     R opIndex(String)(String i) /*const*/ //@@@BUG@@@
691         if (isSomeString!String)
692     {
693         size_t index = lookupNamedGroup(_names, i);
694         return getMatch(index);
695     }
696 
697     ///Number of matches in this object.
698     @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f;  }
699 
700     ///A hook for compatibility with original std.regex.
701     @property ref captures(){ return this; }
702 }
703 
704 ///
705 @system unittest
706 {
707     import std.range.primitives : popFrontN;
708 
709     auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
710     assert(c.pre == "@"); // Part of input preceding match
711     assert(c.post == "#"); // Immediately after match
712     assert(c.hit == c[0] && c.hit == "abc"); // The whole match
713     assert(c[2] == "b");
714     assert(c.front == "abc");
715     c.popFront();
716     assert(c.front == "a");
717     assert(c.back == "c");
718     c.popBack();
719     assert(c.back == "b");
720     popFrontN(c, 2);
721     assert(c.empty);
722 
723     assert(!matchFirst("nothing", "something"));
724 
725     // Captures that are not matched will be null.
726     c = matchFirst("ac", regex(`a(b)?c`));
727     assert(c);
728     assert(!c[1]);
729 }
730 
731 @system unittest
732 {
733     Captures!string c;
734     string s = "abc";
735     assert(cast(bool)(c = matchFirst(s, regex("d")))
736         || cast(bool)(c = matchFirst(s, regex("a"))));
737 }
738 
739 // https://issues.dlang.org/show_bug.cgi?id=19979
740 @system unittest
741 {
742     auto c = matchFirst("bad", regex(`(^)(not )?bad($)`));
743     assert(c[0] && c[0].length == "bad".length);
744     assert(c[1] && !c[1].length);
745     assert(!c[2]);
746     assert(c[3] && !c[3].length);
747 }
748 
749 /++
750     A regex engine state, as returned by `match` family of functions.
751 
752     Effectively it's a forward range of Captures!R, produced
753     by lazily searching for matches in a given input.
754 +/
755 @trusted public struct RegexMatch(R)
756 if (isSomeString!R)
757 {
758     import std.typecons : Rebindable;
759 private:
760     alias Char = BasicElementOf!R;
761     Matcher!Char _engine;
762     Rebindable!(const MatcherFactory!Char) _factory;
763     R _input;
764     Captures!R _captures;
765 
766     this(RegEx)(R input, RegEx prog)
767     {
768         import std.exception : enforce;
769         _input = input;
770         if (prog.factory is null) _factory = defaultFactory!Char(prog);
771         else _factory = prog.factory;
772         _engine = _factory.create(prog, input);
773         assert(_engine.refCount == 1);
774         _captures = Captures!R(this);
775         _captures.matches.mutate((slice) pure { _captures._nMatch = _engine.match(slice); });
776     }
777 
778 public:
779     this(this)
780     {
781         if (_engine) _factory.incRef(_engine);
782     }
783 
784     ~this()
785     {
786         if (_engine) _factory.decRef(_engine);
787     }
788 
789     ///Shorthands for front.pre, front.post, front.hit.
790     @property R pre()
791     {
792         return _captures.pre;
793     }
794 
795     ///ditto
796     @property R post()
797     {
798         return _captures.post;
799     }
800 
801     ///ditto
802     @property R hit()
803     {
804         return _captures.hit;
805     }
806 
807     /++
808         Functionality for processing subsequent matches of global regexes via range interface:
809         ---
810         import std.regex;
811         auto m = matchAll("Hello, world!", regex(`\w+`));
812         assert(m.front.hit == "Hello");
813         m.popFront();
814         assert(m.front.hit == "world");
815         m.popFront();
816         assert(m.empty);
817         ---
818     +/
819     @property inout(Captures!R) front() inout
820     {
821         return _captures;
822     }
823 
824     ///ditto
825     void popFront()
826     {
827         import std.exception : enforce;
828         // CoW - if refCount is not 1, we are aliased by somebody else
829         if (_engine.refCount != 1)
830         {
831             // we create a new engine & abandon this reference
832             auto old = _engine;
833             _engine = _factory.dup(old, _input);
834             _factory.decRef(old);
835         }
836         _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
837     }
838 
839     ///ditto
840     auto save(){ return this; }
841 
842     ///Test if this match object is empty.
843     @property bool empty() const { return _captures._nMatch == 0; }
844 
845     ///Same as !(x.empty), provided for its convenience  in conditional statements.
846     T opCast(T:bool)(){ return !empty; }
847 
848     /// Same as .front, provided for compatibility with original std.regex.
849     @property inout(Captures!R) captures() inout { return _captures; }
850 }
851 
852 private auto matchOnceImpl(RegEx, R)(R input, const auto ref RegEx prog) @trusted
853 {
854     alias Char = BasicElementOf!R;
855     static struct Key
856     {
857         immutable(Char)[] pattern;
858         uint flags;
859     }
860     static Key cacheKey = Key("", -1);
861     static Matcher!Char cache;
862     auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
863     auto key = Key(prog.pattern, prog.flags);
864     Matcher!Char engine;
865     if (cacheKey == key)
866     {
867         engine = cache;
868         engine.rearm(input);
869     }
870     else
871     {
872         engine = factory.create(prog, input);
873         if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one
874         cache = engine;
875         cacheKey = key;
876     }
877     auto captures = Captures!R(input, prog.ngroup, prog.dict);
878     captures.matches.mutate((slice) pure { captures._nMatch = engine.match(slice); });
879     return captures;
880 }
881 
882 // matchOnce is constructed as a safe, pure wrapper over matchOnceImpl. It can be
883 // faked as pure because the static mutable variables are used to cache the key and
884 // character matcher. The technique used avoids delegates and GC.
885 private @safe auto matchOnce(RegEx, R)(R input, const auto ref RegEx prog) pure
886 {
887     static auto impl(R input, const ref RegEx prog)
888     {
889         return matchOnceImpl(input, prog);
890     }
891 
892     static @trusted auto pureImpl(R input, const ref RegEx prog)
893     {
894         auto p = assumePureFunction(&impl);
895         return p(input, prog);
896     }
897 
898     return pureImpl(input, prog);
899 }
900 
901 private auto matchMany(RegEx, R)(R input, auto ref RegEx re) @safe
902 {
903     return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
904 }
905 
906 @system unittest
907 {
908     //sanity checks for new API
909     auto re = regex("abc");
910     assert(!"abc".matchOnce(re).empty);
911     assert("abc".matchOnce(re)[0] == "abc");
912 }
913 
914 // https://issues.dlang.org/show_bug.cgi?id=18135
915 @system unittest
916 {
917     static struct MapResult { RegexMatch!string m; }
918     MapResult m;
919     m = MapResult();
920     assert(m == m);
921 }
922 
923 private enum isReplaceFunctor(alias fun, R) =
924     __traits(compiles, (Captures!R c) { fun(c); });
925 
926 // the lowest level - just stuff replacements into the sink
927 private @trusted void replaceCapturesInto(alias output, Sink, R, T)
928         (ref Sink sink, R input, T captures)
929 if (isOutputRange!(Sink, dchar) && isSomeString!R)
930 {
931     if (captures.empty)
932     {
933         sink.put(input);
934         return;
935     }
936     sink.put(captures.pre);
937     // a hack to get around bogus errors, should be simply output(captures, sink)
938     // "is a nested function and cannot be accessed from"
939     static if (isReplaceFunctor!(output, R))
940         sink.put(output(captures)); //"mutator" type of function
941     else
942         output(captures, sink); //"output" type of function
943     sink.put(captures.post);
944 }
945 
946 // ditto for a range of captures
947 private void replaceMatchesInto(alias output, Sink, R, T)
948         (ref Sink sink, R input, T matches)
949 if (isOutputRange!(Sink, dchar) && isSomeString!R)
950 {
951     size_t offset = 0;
952     foreach (cap; matches)
953     {
954         sink.put(cap.pre[offset .. $]);
955         // same hack, see replaceCapturesInto
956         static if (isReplaceFunctor!(output, R))
957             sink.put(output(cap)); //"mutator" type of function
958         else
959             output(cap, sink); //"output" type of function
960         offset = cap.pre.length + cap.hit.length;
961     }
962     sink.put(input[offset .. $]);
963 }
964 
965 //  a general skeleton of replaceFirst
966 private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
967 if (isSomeString!R && isRegexFor!(RegEx, R))
968 {
969     import std.array : appender;
970     auto data = matchFirst(input, re);
971     if (data.empty)
972         return input;
973     auto app = appender!(R)();
974     replaceCapturesInto!output(app, input, data);
975     return app.data;
976 }
977 
978 // ditto for replaceAll
979 // the method parameter allows old API to ride on the back of the new one
980 private R replaceAllWith(alias output,
981         alias method=matchAll, R, RegEx)(R input, RegEx re)
982 if (isSomeString!R && isRegexFor!(RegEx, R))
983 {
984     import std.array : appender;
985     auto matches = method(input, re); //inout(C)[] fails
986     if (matches.empty)
987         return input;
988     auto app = appender!(R)();
989     replaceMatchesInto!output(app, input, matches);
990     return app.data;
991 }
992 
993 
994 /++
995     Start matching `input` to regex pattern `re`,
996     using Thompson NFA matching scheme.
997 
998     The use of this function is $(RED discouraged) - use either of
999     $(LREF matchAll) or $(LREF matchFirst).
1000 
1001     Delegating  the kind of operation
1002     to "g" flag is soon to be phased out along with the
1003     ability to choose the exact matching scheme. The choice of
1004     matching scheme to use depends highly on the pattern kind and
1005     can done automatically on case by case basis.
1006 
1007     Returns: a `RegexMatch` object holding engine state after first match.
1008 +/
1009 
1010 public auto match(R, RegEx)(R input, RegEx re)
1011 if (isSomeString!R && isRegexFor!(RegEx,R))
1012 {
1013     return RegexMatch!(Unqual!(typeof(input)))(input, re);
1014 }
1015 
1016 ///ditto
1017 public auto match(R, String)(R input, String re)
1018 if (isSomeString!R && isSomeString!String)
1019 {
1020     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1021 }
1022 
1023 /++
1024     Find the first (leftmost) slice of the `input` that
1025     matches the pattern `re`. This function picks the most suitable
1026     regular expression engine depending on the pattern properties.
1027 
1028     `re` parameter can be one of three types:
1029     $(UL
1030       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1031       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1032         compiled  bytecode. )
1033       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1034         compiled native machine code. )
1035     )
1036 
1037     Returns:
1038     $(LREF Captures) containing the extent of a match together with all submatches
1039     if there was a match, otherwise an empty $(LREF Captures) object.
1040 +/
1041 public auto matchFirst(R, RegEx)(R input, RegEx re)
1042 if (isSomeString!R && isRegexFor!(RegEx, R))
1043 {
1044     return matchOnce(input, re);
1045 }
1046 
1047 ///ditto
1048 public auto matchFirst(R, String)(R input, String re)
1049 if (isSomeString!R && isSomeString!String)
1050 {
1051     return matchOnce(input, regex(re));
1052 }
1053 
1054 ///ditto
1055 public auto matchFirst(R, String)(R input, String[] re...)
1056 if (isSomeString!R && isSomeString!String)
1057 {
1058     return matchOnce(input, regex(re));
1059 }
1060 
1061 /++
1062     Initiate a search for all non-overlapping matches to the pattern `re`
1063     in the given `input`. The result is a lazy range of matches generated
1064     as they are encountered in the input going left to right.
1065 
1066     This function picks the most suitable regular expression engine
1067     depending on the pattern properties.
1068 
1069     `re` parameter can be one of three types:
1070     $(UL
1071       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1072       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1073         compiled  bytecode. )
1074       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1075         compiled native machine code. )
1076     )
1077 
1078     Returns:
1079     $(LREF RegexMatch) object that represents matcher state
1080     after the first match was found or an empty one if not present.
1081 +/
1082 public auto matchAll(R, RegEx)(R input, RegEx re)
1083 if (isSomeString!R && isRegexFor!(RegEx, R))
1084 {
1085     return matchMany(input, re);
1086 }
1087 
1088 ///ditto
1089 public auto matchAll(R, String)(R input, String re)
1090 if (isSomeString!R && isSomeString!String)
1091 {
1092     return matchMany(input, regex(re));
1093 }
1094 
1095 ///ditto
1096 public auto matchAll(R, String)(R input, String[] re...)
1097 if (isSomeString!R && isSomeString!String)
1098 {
1099     return matchMany(input, regex(re));
1100 }
1101 
1102 // another set of tests just to cover the new API
1103 @system unittest
1104 {
1105     import std.algorithm.comparison : equal;
1106     import std.algorithm.iteration : map;
1107     import std.conv : to;
1108 
1109     static foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1110     {{
1111         auto str1 = "blah-bleh".to!String();
1112         auto pat1 = "bl[ae]h".to!String();
1113         auto mf = matchFirst(str1, pat1);
1114         assert(mf.equal(["blah".to!String()]));
1115         auto mAll = matchAll(str1, pat1);
1116         assert(mAll.equal!((a,b) => a.equal(b))
1117             ([["blah".to!String()], ["bleh".to!String()]]));
1118 
1119         auto str2 = "1/03/12 - 3/03/12".to!String();
1120         auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1121         auto mf2 = matchFirst(str2, pat2);
1122         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1123         auto mAll2 = matchAll(str2, pat2);
1124         assert(mAll2.front.equal(mf2));
1125         mAll2.popFront();
1126         assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1127         mf2.popFrontN(3);
1128         assert(mf2.equal(["12".to!String()]));
1129 
1130         auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1131         auto str = "2 + 34/56 - 6/1".to!String();
1132         auto cmf = matchFirst(str, ctPat);
1133         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1134         assert(cmf["Quot"] == "34".to!String());
1135         assert(cmf["Denom"] == "56".to!String());
1136 
1137         auto cmAll = matchAll(str, ctPat);
1138         assert(cmAll.front.equal(cmf));
1139         cmAll.popFront();
1140         assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1141     }}
1142 }
1143 
1144 /++
1145     Start matching of `input` to regex pattern `re`,
1146     using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1147     backtracking) matching scheme.
1148 
1149     The use of this function is $(RED discouraged) - use either of
1150     $(LREF matchAll) or $(LREF matchFirst).
1151 
1152     Delegating  the kind of operation
1153     to "g" flag is soon to be phased out along with the
1154     ability to choose the exact matching scheme. The choice of
1155     matching scheme to use depends highly on the pattern kind and
1156     can done automatically on case by case basis.
1157 
1158     Returns: a `RegexMatch` object holding engine
1159     state after first match.
1160 
1161 +/
1162 public auto bmatch(R, RegEx)(R input, RegEx re)
1163 if (isSomeString!R && isRegexFor!(RegEx, R))
1164 {
1165     return RegexMatch!(Unqual!(typeof(input)))(input, re);
1166 }
1167 
1168 ///ditto
1169 public auto bmatch(R, String)(R input, String re)
1170 if (isSomeString!R && isSomeString!String)
1171 {
1172     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1173 }
1174 
1175 // produces replacement string from format using captures for substitution
1176 package void replaceFmt(R, Capt, OutR)
1177     (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1178 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1179     isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1180 {
1181     import std.algorithm.searching : find;
1182     import std.ascii : isDigit, isAlpha;
1183     import std.conv : text, parse;
1184     import std.exception : enforce;
1185     enum State { Normal, Dollar }
1186     auto state = State.Normal;
1187     size_t offset;
1188 L_Replace_Loop:
1189     while (!format.empty)
1190         final switch (state)
1191         {
1192         case State.Normal:
1193             for (offset = 0; offset < format.length; offset++)//no decoding
1194             {
1195                 if (format[offset] == '$')
1196                 {
1197                     state = State.Dollar;
1198                     sink.put(format[0 .. offset]);
1199                     format = format[offset+1 .. $];//ditto
1200                     continue L_Replace_Loop;
1201                 }
1202             }
1203             sink.put(format[0 .. offset]);
1204             format = format[offset .. $];
1205             break;
1206         case State.Dollar:
1207             if (isDigit(format[0]))
1208             {
1209                 uint digit = parse!uint(format);
1210                 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1211                 if (digit < captures.length)
1212                     sink.put(captures[digit]);
1213             }
1214             else if (format[0] == '{')
1215             {
1216                 auto x = find!(a => !isAlpha(a))(format[1..$]);
1217                 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1218                 auto name = format[1 .. $ - x.length];
1219                 format = x[1..$];
1220                 enforce(!name.empty, "invalid name in ${...} replacement format");
1221                 sink.put(captures[name]);
1222             }
1223             else if (format[0] == '&')
1224             {
1225                 sink.put(captures[0]);
1226                 format = format[1 .. $];
1227             }
1228             else if (format[0] == '`')
1229             {
1230                 sink.put(captures.pre);
1231                 format = format[1 .. $];
1232             }
1233             else if (format[0] == '\'')
1234             {
1235                 sink.put(captures.post);
1236                 format = format[1 .. $];
1237             }
1238             else if (format[0] == '$')
1239             {
1240                 sink.put(format[0 .. 1]);
1241                 format = format[1 .. $];
1242             }
1243             state = State.Normal;
1244             break;
1245         }
1246     enforce(state == State.Normal, "invalid format string in regex replace");
1247 }
1248 
1249 /++
1250     Construct a new string from `input` by replacing the first match with
1251     a string generated from it according to the `format` specifier.
1252 
1253     To replace all matches use $(LREF replaceAll).
1254 
1255     Params:
1256     input = string to search
1257     re = compiled regular expression to use
1258     format = _format string to generate replacements from,
1259     see $(S_LINK Replace _format string, the _format string).
1260 
1261     Returns:
1262     A string of the same type with the first match (if any) replaced.
1263     If no match is found returns the input string itself.
1264 +/
1265 public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1266 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1267 {
1268     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1269 }
1270 
1271 ///
1272 @system unittest
1273 {
1274     assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1275 }
1276 
1277 /++
1278     This is a general replacement tool that construct a new string by replacing
1279     matches of pattern `re` in the `input`. Unlike the other overload
1280     there is no format string instead captures are passed to
1281     to a user-defined functor `fun` that returns a new string
1282     to use as replacement.
1283 
1284     This version replaces the first match in `input`,
1285     see $(LREF replaceAll) to replace the all of the matches.
1286 
1287     Returns:
1288     A new string of the same type as `input` with all matches
1289     replaced by return values of `fun`. If no matches found
1290     returns the `input` itself.
1291 +/
1292 public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1293 if (isSomeString!R && isRegexFor!(RegEx, R))
1294 {
1295     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1296 }
1297 
1298 ///
1299 @system unittest
1300 {
1301     import std.conv : to;
1302     string list = "#21 out of 46";
1303     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1304         (list, regex(`[0-9]+`));
1305     assert(newList == "#22 out of 46");
1306 }
1307 
1308 /++
1309     A variation on $(LREF replaceFirst) that instead of allocating a new string
1310     on each call outputs the result piece-wise to the `sink`. In particular
1311     this enables efficient construction of a final output incrementally.
1312 
1313     Like in $(LREF replaceFirst) family of functions there is an overload
1314     for the substitution guided by the `format` string
1315     and the one with the user defined callback.
1316 +/
1317 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1318         (ref Sink sink, R input, RegEx re, const(C)[] format)
1319 if (isOutputRange!(Sink, dchar) && isSomeString!R
1320     && is(C : dchar) && isRegexFor!(RegEx, R))
1321     {
1322     replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1323         (sink, input, matchFirst(input, re));
1324     }
1325 
1326 ///ditto
1327 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1328     (Sink sink, R input, RegEx re)
1329 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1330 {
1331     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1332 }
1333 
1334 ///
1335 @system unittest
1336 {
1337     import std.array;
1338     string m1 = "first message\n";
1339     string m2 = "second message\n";
1340     auto result = appender!string();
1341     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1342     //equivalent of the above with user-defined callback
1343     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1344     assert(result.data == "first\nsecond\n");
1345 }
1346 
1347 //examples for replaceFirst
1348 @system unittest
1349 {
1350     import std.conv;
1351     string list = "#21 out of 46";
1352     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1353         (list, regex(`[0-9]+`));
1354     assert(newList == "#22 out of 46");
1355     import std.array;
1356     string m1 = "first message\n";
1357     string m2 = "second message\n";
1358     auto result = appender!string();
1359     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1360     //equivalent of the above with user-defined callback
1361     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1362     assert(result.data == "first\nsecond\n");
1363 }
1364 
1365 /++
1366     Construct a new string from `input` by replacing all of the
1367     fragments that match a pattern `re` with a string generated
1368     from the match according to the `format` specifier.
1369 
1370     To replace only the first match use $(LREF replaceFirst).
1371 
1372     Params:
1373     input = string to search
1374     re = compiled regular expression to use
1375     format = _format string to generate replacements from,
1376     see $(S_LINK Replace _format string, the _format string).
1377 
1378     Returns:
1379     A string of the same type as `input` with the all
1380     of the matches (if any) replaced.
1381     If no match is found returns the input string itself.
1382 +/
1383 public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1384 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1385 {
1386     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1387 }
1388 
1389 ///
1390 @system unittest
1391 {
1392     // insert comma as thousands delimiter
1393     auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1394     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1395 }
1396 
1397 /++
1398     This is a general replacement tool that construct a new string by replacing
1399     matches of pattern `re` in the `input`. Unlike the other overload
1400     there is no format string instead captures are passed to
1401     to a user-defined functor `fun` that returns a new string
1402     to use as replacement.
1403 
1404     This version replaces all of the matches found in `input`,
1405     see $(LREF replaceFirst) to replace the first match only.
1406 
1407     Returns:
1408     A new string of the same type as `input` with all matches
1409     replaced by return values of `fun`. If no matches found
1410     returns the `input` itself.
1411 
1412     Params:
1413     input = string to search
1414     re = compiled regular expression
1415     fun = delegate to use
1416 +/
1417 public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1418 if (isSomeString!R && isRegexFor!(RegEx, R))
1419 {
1420     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1421 }
1422 
1423 ///
1424 @system unittest
1425 {
1426     string baz(Captures!(string) m)
1427     {
1428         import std.string : toUpper;
1429         return toUpper(m.hit);
1430     }
1431     // Capitalize the letters 'a' and 'r':
1432     auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1433             regex("[ar]"));
1434     assert(s == "StRAp A Rocket engine on A chicken.");
1435 }
1436 
1437 /++
1438     A variation on $(LREF replaceAll) that instead of allocating a new string
1439     on each call outputs the result piece-wise to the `sink`. In particular
1440     this enables efficient construction of a final output incrementally.
1441 
1442     As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1443     the other one with a user defined functor.
1444 +/
1445 public @trusted void replaceAllInto(Sink, R, C, RegEx)
1446         (Sink sink, R input, RegEx re, const(C)[] format)
1447 if (isOutputRange!(Sink, dchar) && isSomeString!R
1448     && is(C : dchar) && isRegexFor!(RegEx, R))
1449     {
1450     replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1451         (sink, input, matchAll(input, re));
1452     }
1453 
1454 ///ditto
1455 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1456         (Sink sink, R input, RegEx re)
1457 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1458 {
1459     replaceMatchesInto!fun(sink, input, matchAll(input, re));
1460 }
1461 
1462 ///
1463 @system unittest
1464 {
1465     // insert comma as thousands delimiter in fifty randomly produced big numbers
1466     import std.array, std.conv, std.random, std.range;
1467     static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1468     auto sink = appender!(char [])();
1469     enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1470     foreach (i; 0 .. 50)
1471     {
1472         sink.clear();
1473         replaceAllInto(sink, text(uniform(min, max)), re, ",");
1474         foreach (pos; iota(sink.data.length - 4, 0, -4))
1475             assert(sink.data[pos] == ',');
1476     }
1477 }
1478 
1479 // exercise all of the replace APIs
1480 @system unittest
1481 {
1482     import std.array : appender;
1483     import std.conv;
1484     // try and check first/all simple substitution
1485     static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1486     {{
1487         S s1 = "curt trial".to!S();
1488         S s2 = "round dome".to!S();
1489         S t1F = "court trial".to!S();
1490         S t2F = "hound dome".to!S();
1491         S t1A = "court trial".to!S();
1492         S t2A = "hound home".to!S();
1493         auto re1 = regex("curt".to!S());
1494         auto re2 = regex("[dr]o".to!S());
1495 
1496         assert(replaceFirst(s1, re1, "court") == t1F);
1497         assert(replaceFirst(s2, re2, "ho") == t2F);
1498         assert(replaceAll(s1, re1, "court") == t1A);
1499         assert(replaceAll(s2, re2, "ho") == t2A);
1500 
1501         auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1502         assert(rep1 == t1F);
1503         assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1504         auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1505         assert(rep1A == t1A);
1506         assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1507 
1508         auto sink = appender!S();
1509         replaceFirstInto(sink, s1, re1, "court");
1510         assert(sink.data == t1F);
1511         replaceFirstInto(sink, s2, re2, "ho");
1512         assert(sink.data == t1F~t2F);
1513         replaceAllInto(sink, s1, re1, "court");
1514         assert(sink.data == t1F~t2F~t1A);
1515         replaceAllInto(sink, s2, re2, "ho");
1516         assert(sink.data == t1F~t2F~t1A~t2A);
1517     }}
1518 }
1519 
1520 /++
1521     Old API for replacement, operation depends on flags of pattern `re`.
1522     With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1523     works the same as $(LREF replaceFirst).
1524 
1525     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1526     or $(LREF replaceFirst) explicitly.
1527 +/
1528 public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1529 if (isSomeString!R && isRegexFor!(RegEx, R))
1530 {
1531     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1532 }
1533 
1534 ///ditto
1535 public R replace(alias fun, R, RegEx)(R input, RegEx re)
1536 if (isSomeString!R && isRegexFor!(RegEx, R))
1537 {
1538     return replaceAllWith!(fun, match)(input, re);
1539 }
1540 
1541 /**
1542 Splits a string `r` using a regular expression `pat` as a separator.
1543 
1544 Params:
1545     keepSeparators = flag to specify if the matches should be in the resulting range
1546     r = the string to split
1547     pat = the pattern to split on
1548 Returns:
1549     A lazy range of strings
1550 */
1551 public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1552 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1553 {
1554 private:
1555     Range _input;
1556     size_t _offset;
1557     alias Rx = typeof(match(Range.init,RegEx.init));
1558     Rx _match;
1559 
1560     static if (keepSeparators) bool onMatch = false;
1561 
1562     @trusted this(Range input, RegEx separator)
1563     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1564         _input = input;
1565         const re = separator.withFlags(separator.flags | RegexOption.global);
1566         if (_input.empty)
1567         {
1568             //there is nothing to match at all, make _offset > 0
1569             _offset = 1;
1570         }
1571         else
1572         {
1573             _match = Rx(_input, re);
1574 
1575             static if (keepSeparators)
1576                 if (_match.pre.empty)
1577                     popFront();
1578         }
1579     }
1580 
1581 public:
1582     auto ref opSlice()
1583     {
1584         return this.save;
1585     }
1586 
1587     ///Forward range primitives.
1588     @property Range front()
1589     {
1590         import std.algorithm.comparison : min;
1591 
1592         assert(!empty && _offset <= _match.pre.length
1593                 && _match.pre.length <= _input.length);
1594 
1595         static if (keepSeparators)
1596         {
1597             if (!onMatch)
1598                 return _input[_offset .. min($, _match.pre.length)];
1599             else
1600                 return _match.hit();
1601         }
1602         else
1603         {
1604             return _input[_offset .. min($, _match.pre.length)];
1605         }
1606     }
1607 
1608     ///ditto
1609     @property bool empty()
1610     {
1611         static if (keepSeparators)
1612             return _offset >= _input.length;
1613         else
1614             return _offset > _input.length;
1615     }
1616 
1617     ///ditto
1618     void popFront()
1619     {
1620         assert(!empty);
1621         if (_match.empty)
1622         {
1623             //No more separators, work is done here
1624             _offset = _input.length + 1;
1625         }
1626         else
1627         {
1628             static if (keepSeparators)
1629             {
1630                 if (!onMatch)
1631                 {
1632                     //skip past the separator
1633                     _offset = _match.pre.length;
1634                 }
1635                 else
1636                 {
1637                     _offset += _match.hit.length;
1638                     _match.popFront();
1639                 }
1640 
1641                 onMatch = !onMatch;
1642             }
1643             else
1644             {
1645                 //skip past the separator
1646                 _offset = _match.pre.length + _match.hit.length;
1647                 _match.popFront();
1648             }
1649         }
1650     }
1651 
1652     ///ditto
1653     @property auto save()
1654     {
1655         return this;
1656     }
1657 }
1658 
1659 /// ditto
1660 public Splitter!(keepSeparators, Range, RegEx) splitter(
1661     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1662 if (
1663     is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1664 {
1665     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1666 }
1667 
1668 ///
1669 @system unittest
1670 {
1671     import std.algorithm.comparison : equal;
1672     auto s1 = ", abc, de,  fg, hi, ";
1673     assert(equal(splitter(s1, regex(", *")),
1674         ["", "abc", "de", "fg", "hi", ""]));
1675 }
1676 
1677 /// Split on a pattern, but keep the matches in the resulting range
1678 @system unittest
1679 {
1680     import std.algorithm.comparison : equal;
1681     import std.typecons : Yes;
1682 
1683     auto pattern = regex(`([\.,])`);
1684 
1685     assert("2003.04.05"
1686         .splitter!(Yes.keepSeparators)(pattern)
1687         .equal(["2003", ".", "04", ".", "05"]));
1688 
1689     assert(",1,2,3"
1690         .splitter!(Yes.keepSeparators)(pattern)
1691         .equal([",", "1", ",", "2", ",", "3"]));
1692 }
1693 
1694 ///An eager version of `splitter` that creates an array with splitted slices of `input`.
1695 public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1696 if (isSomeString!String  && isRegexFor!(RegEx, String))
1697 {
1698     import std.array : appender;
1699     auto a = appender!(String[])();
1700     foreach (e; splitter(input, rx))
1701         a.put(e);
1702     return a.data;
1703 }
1704 
1705 ///Exception object thrown in case of errors during regex compilation.
1706 public alias RegexException = std.regex.internal.ir.RegexException;
1707 
1708 /++
1709   A range that lazily produces a string output escaped
1710   to be used inside of a regular expression.
1711 +/
1712 auto escaper(Range)(Range r)
1713 {
1714     import std.algorithm.searching : find;
1715     static immutable escapables = [Escapables];
1716     static struct Escaper // template to deduce attributes
1717     {
1718         Range r;
1719         bool escaped;
1720 
1721         @property ElementType!Range front(){
1722           if (escaped)
1723               return '\\';
1724           else
1725               return r.front;
1726         }
1727 
1728         @property bool empty(){ return r.empty; }
1729 
1730         void popFront(){
1731           if (escaped) escaped = false;
1732           else
1733           {
1734               r.popFront();
1735               if (!r.empty && !escapables.find(r.front).empty)
1736                   escaped = true;
1737           }
1738         }
1739 
1740         @property auto save(){ return Escaper(r.save, escaped); }
1741     }
1742 
1743     bool escaped = !r.empty && !escapables.find(r.front).empty;
1744     return Escaper(r, escaped);
1745 }
1746 
1747 ///
1748 @system unittest
1749 {
1750     import std.algorithm.comparison;
1751     import std.regex;
1752     string s = `This is {unfriendly} to *regex*`;
1753     assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1754 }
1755 
1756 @system unittest
1757 {
1758     import std.algorithm.comparison;
1759     import std.conv;
1760     static foreach (S; AliasSeq!(string, wstring, dstring))
1761     {{
1762       auto s = "^".to!S;
1763       assert(s.escaper.equal(`\^`));
1764       auto s2 = "";
1765       assert(s2.escaper.equal(""));
1766     }}
1767 }
1768 
1769 @system unittest
1770 {
1771     assert("ab".matchFirst(regex(`a?b?`)).hit == "ab");
1772     assert("ab".matchFirst(regex(`a??b?`)).hit == "");
1773 }