1 /** 2 * `core.simd` emulation layer. 3 * 4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module inteli.types; 8 9 10 pure: 11 nothrow: 12 @nogc: 13 14 version(GNU) 15 { 16 // Note: for GDC support, be sure to use https://explore.dgnu.org/ 17 18 // Future: just detect vectors, do not base upon arch. 19 20 version(X86_64) 21 { 22 enum MMXSizedVectorsAreEmulated = false; 23 enum SSESizedVectorsAreEmulated = false; 24 25 // Does GDC support AVX-sized vectors? 26 static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only. 27 { 28 enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 29 } 30 else 31 { 32 enum AVXSizedVectorsAreEmulated = true; 33 } 34 35 import gcc.builtins; 36 } 37 else 38 { 39 enum MMXSizedVectorsAreEmulated = true; 40 enum SSESizedVectorsAreEmulated = true; 41 enum AVXSizedVectorsAreEmulated = true; 42 } 43 } 44 else version(LDC) 45 { 46 public import ldc.simd; 47 48 // Use this alias to mention it should only be used with LDC, 49 // for example when emulated shufflevector would just be wasteful. 50 alias shufflevectorLDC = shufflevector; 51 52 enum MMXSizedVectorsAreEmulated = false; 53 enum SSESizedVectorsAreEmulated = false; 54 enum AVXSizedVectorsAreEmulated = false; 55 } 56 else version(DigitalMars) 57 { 58 public import core.simd; 59 60 static if (__VERSION__ >= 2100) 61 { 62 // Note: turning this true is very desirable for DMD performance, 63 // but also leads to many bugs being discovered upstream. 64 // The fact that it works at all relies on many workardounds. 65 // In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs, 66 // and a very strong DMD codegen test suite. 67 // What happens typically is that contributors end up on a DMD bug in their PR. 68 // But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions. 69 enum bool tryToEnableCoreSimdWithDMD = true; 70 } 71 else 72 { 73 enum bool tryToEnableCoreSimdWithDMD = false; 74 } 75 76 version(D_SIMD) 77 { 78 enum MMXSizedVectorsAreEmulated = true; 79 enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD; 80 81 // Note: with DMD, AVX-sized vectors can't be enabled yet. 82 // On linux + x86_64, this will fail since a few operands seem to be missing. 83 // FUTURE: enable AVX-sized vectors in DMD. :) 84 // 85 // Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284 86 // Probably other, unreported issues. 87 version(D_AVX) 88 enum AVXSizedVectorsAreEmulated = true; 89 else 90 enum AVXSizedVectorsAreEmulated = true; 91 } 92 else 93 { 94 // Some DMD 32-bit targets don't have D_SIMD 95 enum MMXSizedVectorsAreEmulated = true; 96 enum SSESizedVectorsAreEmulated = true; 97 enum AVXSizedVectorsAreEmulated = true; 98 } 99 } 100 101 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated; 102 103 static if (CoreSimdIsEmulated) 104 { 105 // core.simd is emulated in some capacity: introduce `VectorOps` 106 107 mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) 108 { 109 enum Count = N; 110 alias Base = BaseType; 111 112 BaseType* ptr() return pure nothrow @nogc 113 { 114 return array.ptr; 115 } 116 117 // Unary operators 118 VectorType opUnary(string op)() pure nothrow @safe @nogc 119 { 120 VectorType res = void; 121 mixin("res.array[] = " ~ op ~ "array[];"); 122 return res; 123 } 124 125 // Binary operators 126 VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc 127 { 128 VectorType res = void; 129 mixin("res.array[] = array[] " ~ op ~ " other.array[];"); 130 return res; 131 } 132 133 // Assigning a BaseType value 134 void opAssign(BaseType e) pure nothrow @safe @nogc 135 { 136 array[] = e; 137 } 138 139 // Assigning a static array 140 void opAssign(ArrayType v) pure nothrow @safe @nogc 141 { 142 array[] = v[]; 143 } 144 145 void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc 146 { 147 mixin("array[] " ~ op ~ "= other.array[];"); 148 } 149 150 // Assigning a dyn array 151 this(ArrayType v) pure nothrow @safe @nogc 152 { 153 array[] = v[]; 154 } 155 156 // Broadcast constructor 157 this(BaseType x) pure nothrow @safe @nogc 158 { 159 array[] = x; 160 } 161 162 /// We can't support implicit conversion but do support explicit casting. 163 /// "Vector types of the same size can be implicitly converted among each other." 164 /// Casting to another vector type is always just a raw copy. 165 VecDest opCast(VecDest)() pure const nothrow @trusted @nogc 166 if (VecDest.sizeof == VectorType.sizeof) 167 { 168 VecDest dest = void; 169 // Copy 170 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; 171 return dest; 172 } 173 174 ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc 175 { 176 return array[i]; 177 } 178 179 } 180 } 181 else 182 { 183 public import core.simd; 184 185 // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can 186 // And GDC sometimes need those unsigned vector types for some intrinsics. 187 // For internal use only. 188 package alias ushort8 = Vector!(ushort[8]); 189 package alias ubyte8 = Vector!(ubyte[8]); 190 package alias ubyte16 = Vector!(ubyte[16]); 191 192 static if (!AVXSizedVectorsAreEmulated) 193 { 194 package alias ushort16 = Vector!(ushort[16]); 195 package alias ubyte32 = Vector!(ubyte[32]); 196 } 197 } 198 199 // Emulate ldc.simd cmpMask and other masks. 200 // Note: these should be deprecated on non-LDC, 201 // since it's slower to generate that code. 202 version(LDC) 203 {} 204 else 205 { 206 // TODO: deprecated and write plain versions instead 207 208 private template BaseType(V) 209 { 210 alias typeof( ( { V v; return v; }()).array[0]) BaseType; 211 } 212 213 private template TrueMask(V) 214 { 215 alias Elem = BaseType!V; 216 217 static if (is(Elem == float)) 218 { 219 immutable uint m1 = 0xffffffff; 220 enum Elem TrueMask = *cast(float*)(&m1); 221 } 222 else static if (is(Elem == double)) 223 { 224 immutable ulong m1 = 0xffffffff_ffffffff; 225 enum Elem TrueMask = *cast(double*)(&m1); 226 } 227 else // integer case 228 { 229 enum Elem TrueMask = -1; 230 } 231 } 232 233 Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison 234 { 235 enum size_t Count = Vec.array.length; 236 Vec result; 237 foreach(int i; 0..Count) 238 { 239 bool cond = a.array[i] == b.array[i]; 240 result.ptr[i] = cond ? TrueMask!Vec : 0; 241 } 242 return result; 243 } 244 245 Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison 246 { 247 enum size_t Count = Vec.array.length; 248 Vec result; 249 foreach(int i; 0..Count) 250 { 251 bool cond = a.array[i] > b.array[i]; 252 result.ptr[i] = cond ? TrueMask!Vec : 0; 253 } 254 return result; 255 } 256 } 257 258 unittest 259 { 260 float4 a = [1, 3, 5, 7]; 261 float4 b = [2, 3, 4, 5]; 262 int4 c = cast(int4)(greaterMask!float4(a, b)); 263 static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; 264 assert(c.array == correct); 265 } 266 267 static if (MMXSizedVectorsAreEmulated) 268 { 269 /// MMX-like SIMD types 270 struct float2 271 { 272 float[2] array; 273 mixin VectorOps!(float2, float[2]); 274 } 275 276 struct byte8 277 { 278 byte[8] array; 279 mixin VectorOps!(byte8, byte[8]); 280 } 281 282 struct short4 283 { 284 short[4] array; 285 mixin VectorOps!(short4, short[4]); 286 } 287 288 struct int2 289 { 290 int[2] array; 291 mixin VectorOps!(int2, int[2]); 292 } 293 294 struct long1 295 { 296 long[1] array; 297 mixin VectorOps!(long1, long[1]); 298 } 299 } 300 else 301 { 302 // For this compiler, defining MMX-sized vectors is working. 303 public import core.simd; 304 alias Vector!(long [1]) long1; 305 alias Vector!(float[2]) float2; 306 alias Vector!(int [2]) int2; 307 alias Vector!(short[4]) short4; 308 alias Vector!(byte [8]) byte8; 309 } 310 311 static assert(float2.sizeof == 8); 312 static assert(byte8.sizeof == 8); 313 static assert(short4.sizeof == 8); 314 static assert(int2.sizeof == 8); 315 static assert(long1.sizeof == 8); 316 317 318 static if (SSESizedVectorsAreEmulated) 319 { 320 /// SSE-like SIMD types 321 322 struct float4 323 { 324 float[4] array; 325 mixin VectorOps!(float4, float[4]); 326 } 327 328 struct byte16 329 { 330 byte[16] array; 331 mixin VectorOps!(byte16, byte[16]); 332 } 333 334 struct short8 335 { 336 short[8] array; 337 mixin VectorOps!(short8, short[8]); 338 } 339 340 struct int4 341 { 342 int[4] array; 343 mixin VectorOps!(int4, int[4]); 344 } 345 346 struct long2 347 { 348 long[2] array; 349 mixin VectorOps!(long2, long[2]); 350 } 351 352 struct double2 353 { 354 double[2] array; 355 mixin VectorOps!(double2, double[2]); 356 } 357 } 358 359 static assert(float4.sizeof == 16); 360 static assert(byte16.sizeof == 16); 361 static assert(short8.sizeof == 16); 362 static assert(int4.sizeof == 16); 363 static assert(long2.sizeof == 16); 364 static assert(double2.sizeof == 16); 365 366 367 static if (AVXSizedVectorsAreEmulated) 368 { 369 /// AVX-like SIMD types 370 371 struct float8 372 { 373 float[8] array; 374 mixin VectorOps!(float8, float[8]); 375 } 376 377 struct byte32 378 { 379 byte[32] array; 380 mixin VectorOps!(byte32, byte[32]); 381 } 382 383 struct short16 384 { 385 short[16] array; 386 mixin VectorOps!(short16, short[16]); 387 } 388 389 struct int8 390 { 391 int[8] array; 392 mixin VectorOps!(int8, int[8]); 393 } 394 395 struct long4 396 { 397 long[4] array; 398 mixin VectorOps!(long4, long[4]); 399 } 400 401 struct double4 402 { 403 double[4] array; 404 mixin VectorOps!(double4, double[4]); 405 } 406 } 407 else 408 { 409 public import core.simd; 410 } 411 static assert(float8.sizeof == 32); 412 static assert(byte32.sizeof == 32); 413 static assert(short16.sizeof == 32); 414 static assert(int8.sizeof == 32); 415 static assert(long4.sizeof == 32); 416 static assert(double4.sizeof == 32); 417 418 419 420 421 alias __m256 = float8; 422 alias __m256i = long4; // long long __vector with ICC, GCC, and clang 423 alias __m256d = double4; 424 alias __m128 = float4; 425 alias __m128i = int4; 426 alias __m128d = double2; 427 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long 428 429 int _MM_SHUFFLE2(int x, int y) pure @safe 430 { 431 assert(x >= 0 && x <= 1); 432 assert(y >= 0 && y <= 1); 433 return (x << 1) | y; 434 } 435 436 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe 437 { 438 assert(x >= 0 && x <= 3); 439 assert(y >= 0 && y <= 3); 440 assert(z >= 0 && z <= 3); 441 assert(w >= 0 && w <= 3); 442 return (z<<6) | (y<<4) | (x<<2) | w; 443 } 444 445 // test assignment from scalar to vector type 446 unittest 447 { 448 float4 A = 3.0f; 449 float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; 450 assert(A.array == correctA); 451 452 int2 B = 42; 453 int[2] correctB = [42, 42]; 454 assert(B.array == correctB); 455 }