The OpenD Programming Language

1 /++
2 $(H1 String routines)
3 
4 The module contains SIMD-accelerated string routines.
5 
6 Copyright: 2022 Ilia Ki, Symmetry Investments
7 
8 Authors: Ilia Ki
9 +/
10 module mir..string;
11 
12 import std.traits: isSomeChar;
13 
14 private alias Representation(T :  char) = byte;
15 private alias Representation(T : wchar) = short;
16 private alias Representation(T : dchar) = int;
17 
18 private enum size_t ScanVecSize = 16;
19 
20 ///
21 bool containsAny(C, size_t L)
22     (scope const(C)[] str, const C[L] chars...)
23     @trusted pure nothrow @nogc
24     if (isSomeChar!C && L)
25 {
26     enum size_t NF = ScanVecSize / C.sizeof;
27 
28     alias U = Representation!C;
29 
30     // version(none)
31     version (MirNoSIMD) {}
32     else
33     version (LittleEndian)
34     version (LDC)
35     static if (L <= 8)
36     static if (is(__vector(U[NF])))
37     if (!__ctfe)
38     {
39         import mir.bitop: cttzp;
40 
41         static foreach (F; 1 .. 2 + (C.sizeof == 1))
42         {{
43             enum N = NF / F;
44             alias V = __vector(U[N]);
45 
46             V[L] charsv;
47             static foreach (i; 0 .. L)
48                 charsv[i] = chars[i];
49 
50             while (str.length >= N)
51             {
52                 auto a = cast(V) *cast(const U[N]*) str.ptr;
53 
54                 import mir.internal.ldc_simd: mask = equalMask;
55 
56                 V[L] masked;
57                 static foreach (i; 0 .. L)
58                     masked[i] = mask!(__vector(U[N]))(a, charsv[i]);
59 
60                 static foreach (i; 0 .. L)
61                     static if (i == 0)
62                         V m = masked[i];
63                     else
64                         m |= masked[i];
65 
66                 static if (U[N].sizeof == size_t.sizeof)
67                 {
68                     size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]];
69                 }
70                 else
71                 {
72                     auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array;
73                 }
74 
75                 foreach (word; words)
76                     if (word)
77                         return true;
78 
79                 str = str[N .. $];
80 
81                 static if (F != 1)
82                     break;
83             }
84 
85         }}
86     }
87 
88     foreach (C c; str)
89         static foreach (i; 0 .. L)
90             if (c == chars[i])
91                 return true;
92     return false;
93 }
94 
95 ///
96 version(mir_test)
97 @safe pure nothrow @nogc
98 unittest
99 {
100     import mir.test: should;
101 
102     assert("     hello world     ".containsAny('w'));
103     assert(!"     hello world     ".containsAny('W'));
104     assert("     hello world     ".containsAny('W', 'e'));
105     assert("     hello world     ".containsAny("We"));
106 }
107 
108 ///
109 template scanLeftAny(string op = "==")
110     if (op == "==" || op == "!=")
111 {
112     ///
113     inout(C)[]
114         scanLeftAny(C, size_t L)
115         (return scope inout(C)[] str, const C[L] chars...)
116         @trusted pure nothrow @nogc
117         if (isSomeChar!C && L)
118     {
119         enum size_t NF = ScanVecSize / C.sizeof;
120 
121         alias U = Representation!C;
122 
123         // version(none)
124         version (MirNoSIMD) {}
125         else
126         version (LittleEndian)
127         version (LDC)
128         static if (L <= 8)
129         static if (is(__vector(U[NF])))
130         if (!__ctfe)
131         {
132             import mir.bitop: cttzp;
133 
134             static foreach (F; 1 .. 2 + (C.sizeof == 1))
135             {{
136                 enum N = NF / F;
137                 alias V = __vector(U[N]);
138                 V[L] charsv;
139                 static foreach (i; 0 .. L)
140                     charsv[i] = chars[i];
141 
142                 while (str.length >= N)
143                 {
144                     auto a = cast(V) *cast(const U[N]*) str.ptr;
145 
146                     import mir.internal.ldc_simd: mask = equalMask;
147 
148                     V[L] masked;
149                     static foreach (i; 0 .. L)
150                         masked[i] = mask!(__vector(U[N]))(a, charsv[i]);
151 
152                     static foreach (i; 0 .. L)
153                         static if (i == 0)
154                             V m = masked[i];
155                         else
156                             m |= masked[i];
157 
158                     static if (op == "!=")
159                         m = ~m;
160 
161                     static if (U[N].sizeof == size_t.sizeof)
162                     {
163                         size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]];
164                     }
165                     else
166                     {
167                         auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array;
168                     }
169 
170                     size_t p;
171 
172                     static foreach (i; 0 .. words.length)
173                     {
174                         p += cttzp(words[i]);
175                         if (words[i])
176                         {
177                             static if (F == 1)
178                                 goto L;
179                             else
180                                 goto M;
181                         }
182                     }
183                     str = str[N .. $];
184                     static if (F == 1)
185                         continue;
186                     else
187                         break;
188 
189                     static if (F == 1)
190                 {L:}
191                     else
192                 {M:}
193                     return str[p / (U.sizeof * 8) .. $];
194                 }
195             }}
196         }
197 
198         Loop: for (; str.length; str = str[1 .. $])
199         {
200             auto c = str[0];
201             static foreach (i; 0 .. L)
202             {
203                 if (c == chars[i])
204                 {
205                     static if (op == "==")
206                         break Loop;
207                     else
208                         continue Loop;
209                 }
210             }
211             static if (op == "==")
212                 continue Loop;
213             else
214                 break Loop;
215         }
216         return str;
217     }
218 }
219 
220 ///
221 alias stripLeft = scanLeftAny!"!=";
222 
223 ///
224 version(mir_test)
225 @safe pure nothrow @nogc
226 unittest
227 {
228     import mir.test: should;
229 
230     "     hello world     ".stripLeft(' ').should == "hello world     ";
231     "     hello world     ".scanLeftAny('w').should == "world     ";
232     "     hello world     ".scanLeftAny('!').should == "";
233     "\t\n\thello world\n\t___".stripLeft('\n', '\t').should == "hello world\n\t___";
234     "hello world".stripLeft(' ').should == "hello world";
235     "hello world           ".stripLeft(' ').should == "hello world           ";
236 
237     "        _____________              hello world     "
238         .stripLeft(' ', '_').should == "hello world     ";
239 }
240 
241 ///
242 template scanRightAny(string op = "==")
243     if (op == "==" || op == "!=")
244 {
245     ///
246     inout(C)[]
247         scanRightAny(C, size_t L)
248         (return scope inout(C)[] str, const C[L] chars...)
249         @trusted pure nothrow @nogc
250         if (isSomeChar!C && L)
251     {
252         enum size_t NF = ScanVecSize / C.sizeof;
253 
254         alias U = Representation!C;
255 
256         // version(none)
257         version (MirNoSIMD) {}
258         else
259         version (LittleEndian)
260         version (LDC)
261         static if (L <= 8)
262         static if (is(__vector(U[NF])))
263         if (!__ctfe)
264         {
265             import mir.bitop: ctlzp;
266 
267             static foreach (F; 1 .. 2 + (C.sizeof == 1))
268             {{
269                 enum N = NF / F;
270 
271                 alias V = __vector(U[N]);
272                 V[L] charsv;
273                 static foreach (i; 0 .. L)
274                     charsv[i] = chars[i];
275 
276                 while (str.length >= N)
277                 {
278                     auto a = cast(V) *cast(const U[N]*) (str.ptr + str.length - N);
279 
280                     import mir.internal.ldc_simd: mask = equalMask;
281 
282                     V[L] masked;
283                     static foreach (i; 0 .. L)
284                         masked[i] = mask!(__vector(U[N]))(a, charsv[i]);
285 
286                     static foreach (i; 0 .. L)
287                         static if (i == 0)
288                             V m = masked[i];
289                         else
290                             m |= masked[i];
291 
292                     static if (op == "!=")
293                         m = ~m;
294 
295                     static if (U[N].sizeof == size_t.sizeof)
296                     {
297                         size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]];
298                     }
299                     else
300                     {
301                         auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array;
302                     }
303                     size_t p;
304 
305                     static foreach (i; 0 .. words.length)
306                     {
307                         p += ctlzp(words[$ - 1 - i]);
308                         if (words[$ - 1 - i])
309                         {
310                             static if (F == 1)
311                                 goto L;
312                             else
313                                 goto M;
314                         }
315                     }
316                     str = str[0 .. $ - N];
317                     static if (F == 1)
318                         continue;
319                     else
320                         break;
321 
322                     static if (F == 1)
323                 {L:}
324                     else
325                 {M:}
326                     return str[0 .. $ - p / (U.sizeof * 8)];
327                 }
328             }}
329         }
330 
331         Loop: for (; str.length; str = str[0 .. $ - 1])
332         {
333             auto c = str[$ - 1];
334             static foreach (i; 0 .. L)
335             {
336                 if (c == chars[i])
337                 {
338                     static if (op == "==")
339                         break Loop;
340                     else
341                         continue Loop;
342                 }
343             }
344             static if (op == "==")
345                 continue Loop;
346             else
347                 break Loop;
348         }
349         return str;
350     }
351 }
352 
353 ///
354 alias stripRight = scanRightAny!"!=";
355 
356 ///
357 version(mir_test)
358 @safe pure nothrow @nogc
359 unittest
360 {
361     import mir.test: should;
362 
363     "     hello world     ".stripRight(' ').should == "     hello world";
364     "     hello world     ".scanRightAny('w').should == "     hello w";
365     "     hello world     ".scanRightAny('!').should == "";
366     "___\t\n\thello world\n\t".stripRight('\n', '\t').should == "___\t\n\thello world";
367     "hello world".stripRight(' ').should == "hello world";
368     "           hello world".stripRight(' ').should == "           hello world";
369 
370     "     hello world        _____________              "
371         .stripRight(' ', '_').should == "     hello world";
372 }
373 
374 ///
375 inout(C)[]
376     strip(C, size_t L)
377     (return scope inout(C)[] str, const C[L] chars...)
378     @safe pure nothrow @nogc
379     if (isSomeChar!C && L)
380 {
381     return str.stripLeft(chars).stripRight(chars);
382 }
383 
384 ///
385 version(mir_test)
386 @safe pure nothrow @nogc
387 unittest
388 {
389     import mir.test: should;
390 
391     "     hello world!     ".strip(' ')     .should == "hello world!";
392     "     hello world!!!   ".strip(" !").should == "hello world";
393 }