1 /++ 2 $(H1 String routines) 3 4 The module contains SIMD-accelerated string routines. 5 6 Copyright: 2022 Ilia Ki, Symmetry Investments 7 8 Authors: Ilia Ki 9 +/ 10 module mir..string; 11 12 import std.traits: isSomeChar; 13 14 private alias Representation(T : char) = byte; 15 private alias Representation(T : wchar) = short; 16 private alias Representation(T : dchar) = int; 17 18 private enum size_t ScanVecSize = 16; 19 20 /// 21 bool containsAny(C, size_t L) 22 (scope const(C)[] str, const C[L] chars...) 23 @trusted pure nothrow @nogc 24 if (isSomeChar!C && L) 25 { 26 enum size_t NF = ScanVecSize / C.sizeof; 27 28 alias U = Representation!C; 29 30 // version(none) 31 version (MirNoSIMD) {} 32 else 33 version (LittleEndian) 34 version (LDC) 35 static if (L <= 8) 36 static if (is(__vector(U[NF]))) 37 if (!__ctfe) 38 { 39 import mir.bitop: cttzp; 40 41 static foreach (F; 1 .. 2 + (C.sizeof == 1)) 42 {{ 43 enum N = NF / F; 44 alias V = __vector(U[N]); 45 46 V[L] charsv; 47 static foreach (i; 0 .. L) 48 charsv[i] = chars[i]; 49 50 while (str.length >= N) 51 { 52 auto a = cast(V) *cast(const U[N]*) str.ptr; 53 54 import mir.internal.ldc_simd: mask = equalMask; 55 56 V[L] masked; 57 static foreach (i; 0 .. L) 58 masked[i] = mask!(__vector(U[N]))(a, charsv[i]); 59 60 static foreach (i; 0 .. L) 61 static if (i == 0) 62 V m = masked[i]; 63 else 64 m |= masked[i]; 65 66 static if (U[N].sizeof == size_t.sizeof) 67 { 68 size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]]; 69 } 70 else 71 { 72 auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array; 73 } 74 75 foreach (word; words) 76 if (word) 77 return true; 78 79 str = str[N .. $]; 80 81 static if (F != 1) 82 break; 83 } 84 85 }} 86 } 87 88 foreach (C c; str) 89 static foreach (i; 0 .. L) 90 if (c == chars[i]) 91 return true; 92 return false; 93 } 94 95 /// 96 version(mir_test) 97 @safe pure nothrow @nogc 98 unittest 99 { 100 import mir.test: should; 101 102 assert(" hello world ".containsAny('w')); 103 assert(!" hello world ".containsAny('W')); 104 assert(" hello world ".containsAny('W', 'e')); 105 assert(" hello world ".containsAny("We")); 106 } 107 108 /// 109 template scanLeftAny(string op = "==") 110 if (op == "==" || op == "!=") 111 { 112 /// 113 inout(C)[] 114 scanLeftAny(C, size_t L) 115 (return scope inout(C)[] str, const C[L] chars...) 116 @trusted pure nothrow @nogc 117 if (isSomeChar!C && L) 118 { 119 enum size_t NF = ScanVecSize / C.sizeof; 120 121 alias U = Representation!C; 122 123 // version(none) 124 version (MirNoSIMD) {} 125 else 126 version (LittleEndian) 127 version (LDC) 128 static if (L <= 8) 129 static if (is(__vector(U[NF]))) 130 if (!__ctfe) 131 { 132 import mir.bitop: cttzp; 133 134 static foreach (F; 1 .. 2 + (C.sizeof == 1)) 135 {{ 136 enum N = NF / F; 137 alias V = __vector(U[N]); 138 V[L] charsv; 139 static foreach (i; 0 .. L) 140 charsv[i] = chars[i]; 141 142 while (str.length >= N) 143 { 144 auto a = cast(V) *cast(const U[N]*) str.ptr; 145 146 import mir.internal.ldc_simd: mask = equalMask; 147 148 V[L] masked; 149 static foreach (i; 0 .. L) 150 masked[i] = mask!(__vector(U[N]))(a, charsv[i]); 151 152 static foreach (i; 0 .. L) 153 static if (i == 0) 154 V m = masked[i]; 155 else 156 m |= masked[i]; 157 158 static if (op == "!=") 159 m = ~m; 160 161 static if (U[N].sizeof == size_t.sizeof) 162 { 163 size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]]; 164 } 165 else 166 { 167 auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array; 168 } 169 170 size_t p; 171 172 static foreach (i; 0 .. words.length) 173 { 174 p += cttzp(words[i]); 175 if (words[i]) 176 { 177 static if (F == 1) 178 goto L; 179 else 180 goto M; 181 } 182 } 183 str = str[N .. $]; 184 static if (F == 1) 185 continue; 186 else 187 break; 188 189 static if (F == 1) 190 {L:} 191 else 192 {M:} 193 return str[p / (U.sizeof * 8) .. $]; 194 } 195 }} 196 } 197 198 Loop: for (; str.length; str = str[1 .. $]) 199 { 200 auto c = str[0]; 201 static foreach (i; 0 .. L) 202 { 203 if (c == chars[i]) 204 { 205 static if (op == "==") 206 break Loop; 207 else 208 continue Loop; 209 } 210 } 211 static if (op == "==") 212 continue Loop; 213 else 214 break Loop; 215 } 216 return str; 217 } 218 } 219 220 /// 221 alias stripLeft = scanLeftAny!"!="; 222 223 /// 224 version(mir_test) 225 @safe pure nothrow @nogc 226 unittest 227 { 228 import mir.test: should; 229 230 " hello world ".stripLeft(' ').should == "hello world "; 231 " hello world ".scanLeftAny('w').should == "world "; 232 " hello world ".scanLeftAny('!').should == ""; 233 "\t\n\thello world\n\t___".stripLeft('\n', '\t').should == "hello world\n\t___"; 234 "hello world".stripLeft(' ').should == "hello world"; 235 "hello world ".stripLeft(' ').should == "hello world "; 236 237 " _____________ hello world " 238 .stripLeft(' ', '_').should == "hello world "; 239 } 240 241 /// 242 template scanRightAny(string op = "==") 243 if (op == "==" || op == "!=") 244 { 245 /// 246 inout(C)[] 247 scanRightAny(C, size_t L) 248 (return scope inout(C)[] str, const C[L] chars...) 249 @trusted pure nothrow @nogc 250 if (isSomeChar!C && L) 251 { 252 enum size_t NF = ScanVecSize / C.sizeof; 253 254 alias U = Representation!C; 255 256 // version(none) 257 version (MirNoSIMD) {} 258 else 259 version (LittleEndian) 260 version (LDC) 261 static if (L <= 8) 262 static if (is(__vector(U[NF]))) 263 if (!__ctfe) 264 { 265 import mir.bitop: ctlzp; 266 267 static foreach (F; 1 .. 2 + (C.sizeof == 1)) 268 {{ 269 enum N = NF / F; 270 271 alias V = __vector(U[N]); 272 V[L] charsv; 273 static foreach (i; 0 .. L) 274 charsv[i] = chars[i]; 275 276 while (str.length >= N) 277 { 278 auto a = cast(V) *cast(const U[N]*) (str.ptr + str.length - N); 279 280 import mir.internal.ldc_simd: mask = equalMask; 281 282 V[L] masked; 283 static foreach (i; 0 .. L) 284 masked[i] = mask!(__vector(U[N]))(a, charsv[i]); 285 286 static foreach (i; 0 .. L) 287 static if (i == 0) 288 V m = masked[i]; 289 else 290 m |= masked[i]; 291 292 static if (op == "!=") 293 m = ~m; 294 295 static if (U[N].sizeof == size_t.sizeof) 296 { 297 size_t[U[N].sizeof / size_t.sizeof] words = [(cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array[0]]; 298 } 299 else 300 { 301 auto words = (cast(__vector(size_t[U[N].sizeof / size_t.sizeof])) m).array; 302 } 303 size_t p; 304 305 static foreach (i; 0 .. words.length) 306 { 307 p += ctlzp(words[$ - 1 - i]); 308 if (words[$ - 1 - i]) 309 { 310 static if (F == 1) 311 goto L; 312 else 313 goto M; 314 } 315 } 316 str = str[0 .. $ - N]; 317 static if (F == 1) 318 continue; 319 else 320 break; 321 322 static if (F == 1) 323 {L:} 324 else 325 {M:} 326 return str[0 .. $ - p / (U.sizeof * 8)]; 327 } 328 }} 329 } 330 331 Loop: for (; str.length; str = str[0 .. $ - 1]) 332 { 333 auto c = str[$ - 1]; 334 static foreach (i; 0 .. L) 335 { 336 if (c == chars[i]) 337 { 338 static if (op == "==") 339 break Loop; 340 else 341 continue Loop; 342 } 343 } 344 static if (op == "==") 345 continue Loop; 346 else 347 break Loop; 348 } 349 return str; 350 } 351 } 352 353 /// 354 alias stripRight = scanRightAny!"!="; 355 356 /// 357 version(mir_test) 358 @safe pure nothrow @nogc 359 unittest 360 { 361 import mir.test: should; 362 363 " hello world ".stripRight(' ').should == " hello world"; 364 " hello world ".scanRightAny('w').should == " hello w"; 365 " hello world ".scanRightAny('!').should == ""; 366 "___\t\n\thello world\n\t".stripRight('\n', '\t').should == "___\t\n\thello world"; 367 "hello world".stripRight(' ').should == "hello world"; 368 " hello world".stripRight(' ').should == " hello world"; 369 370 " hello world _____________ " 371 .stripRight(' ', '_').should == " hello world"; 372 } 373 374 /// 375 inout(C)[] 376 strip(C, size_t L) 377 (return scope inout(C)[] str, const C[L] chars...) 378 @safe pure nothrow @nogc 379 if (isSomeChar!C && L) 380 { 381 return str.stripLeft(chars).stripRight(chars); 382 } 383 384 /// 385 version(mir_test) 386 @safe pure nothrow @nogc 387 unittest 388 { 389 import mir.test: should; 390 391 " hello world! ".strip(' ') .should == "hello world!"; 392 " hello world!!! ".strip(" !").should == "hello world"; 393 }