The OpenD Programming Language

1 /**
2 * `core.simd` emulation layer.
3 *
4 * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module inteli.types;
8 
9 
10 pure:
11 nothrow:
12 @nogc:
13 
14 version(GNU)
15 {
16     // Note: for GDC support, be sure to use https://explore.dgnu.org/
17 
18     // Future: just detect vectors, do not base upon arch.
19 
20     version(X86_64)
21     {
22         enum MMXSizedVectorsAreEmulated = false;
23         enum SSESizedVectorsAreEmulated = false;
24 
25         // Does GDC support AVX-sized vectors?
26         static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only.
27         {
28             enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); 
29         }
30         else
31         {
32             enum AVXSizedVectorsAreEmulated = true;
33         }
34 
35         import gcc.builtins;
36     }
37     else
38     {
39         enum MMXSizedVectorsAreEmulated = true;
40         enum SSESizedVectorsAreEmulated = true;
41         enum AVXSizedVectorsAreEmulated = true;
42     }
43 }
44 else version(LDC)
45 {
46     public import ldc.simd;
47 
48     // Use this alias to mention it should only be used with LDC,
49     // for example when emulated shufflevector would just be wasteful.
50     alias shufflevectorLDC = shufflevector;
51 
52     enum MMXSizedVectorsAreEmulated = false;
53     enum SSESizedVectorsAreEmulated = false;
54     enum AVXSizedVectorsAreEmulated = false;
55 }
56 else version(DigitalMars)
57 {
58     public import core.simd;
59 
60     static if (__VERSION__ >= 2100)
61     {
62         // Note: turning this true is very desirable for DMD performance,
63         // but also leads to many bugs being discovered upstream.
64         // The fact that it works at all relies on many workardounds.
65         // In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs,
66         // and a very strong DMD codegen test suite.
67         // What happens typically is that contributors end up on a DMD bug in their PR.
68         // But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions.
69         enum bool tryToEnableCoreSimdWithDMD = true;
70     }
71     else
72     {
73         enum bool tryToEnableCoreSimdWithDMD = false;
74     }
75 
76     version(D_SIMD)
77     {
78         enum MMXSizedVectorsAreEmulated = true;
79         enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD;
80 
81         // Note: with DMD, AVX-sized vectors can't be enabled yet.
82         // On linux + x86_64, this will fail since a few operands seem to be missing. 
83         // FUTURE: enable AVX-sized vectors in DMD. :)
84         //
85         // Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284
86         //           Probably other, unreported issues.
87         version(D_AVX)
88             enum AVXSizedVectorsAreEmulated = true;
89         else
90             enum AVXSizedVectorsAreEmulated = true;
91     }
92     else
93     {
94         // Some DMD 32-bit targets don't have D_SIMD
95         enum MMXSizedVectorsAreEmulated = true;
96         enum SSESizedVectorsAreEmulated = true;
97         enum AVXSizedVectorsAreEmulated = true;
98     }
99 }
100 
101 enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated;
102 
103 static if (CoreSimdIsEmulated)
104 {
105     // core.simd is emulated in some capacity: introduce `VectorOps`
106 
107     mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
108     {
109         enum Count = N;
110         alias Base = BaseType;
111 
112         BaseType* ptr() return pure nothrow @nogc
113         {
114             return array.ptr;
115         }
116 
117         // Unary operators
118         VectorType opUnary(string op)() pure nothrow @safe @nogc
119         {
120             VectorType res = void;
121             mixin("res.array[] = " ~ op ~ "array[];");
122             return res;
123         }
124 
125         // Binary operators
126         VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
127         {
128             VectorType res = void;
129             mixin("res.array[] = array[] " ~ op ~ " other.array[];");
130             return res;
131         }
132 
133         // Assigning a BaseType value
134         void opAssign(BaseType e) pure nothrow @safe @nogc
135         {
136             array[] = e;
137         }
138 
139         // Assigning a static array
140         void opAssign(ArrayType v) pure nothrow @safe @nogc
141         {
142             array[] = v[];
143         }
144 
145         void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
146         {
147             mixin("array[] "  ~ op ~ "= other.array[];");
148         }
149 
150         // Assigning a dyn array
151         this(ArrayType v) pure nothrow @safe @nogc
152         {
153             array[] = v[];
154         }
155 
156         // Broadcast constructor
157         this(BaseType x) pure nothrow @safe @nogc
158         {
159             array[] = x;
160         }
161 
162         /// We can't support implicit conversion but do support explicit casting.
163         /// "Vector types of the same size can be implicitly converted among each other."
164         /// Casting to another vector type is always just a raw copy.
165         VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
166             if (VecDest.sizeof == VectorType.sizeof)
167             {
168                 VecDest dest = void;
169                 // Copy
170                 dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
171                 return dest;
172             }
173 
174         ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc
175         {
176             return array[i];
177         }
178 
179     }
180 }
181 else
182 {
183     public import core.simd;
184 
185     // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
186     // And GDC sometimes need those unsigned vector types for some intrinsics.
187     // For internal use only.
188     package alias ushort8 = Vector!(ushort[8]);
189     package alias ubyte8  = Vector!(ubyte[8]);
190     package alias ubyte16 = Vector!(ubyte[16]);
191 
192     static if (!AVXSizedVectorsAreEmulated)
193     {
194         package alias ushort16 = Vector!(ushort[16]);
195         package alias ubyte32  = Vector!(ubyte[32]);
196     }
197 }
198 
199 // Emulate ldc.simd cmpMask and other masks.
200 // Note: these should be deprecated on non-LDC, 
201 // since it's slower to generate that code.
202 version(LDC)
203 {} 
204 else
205 {
206     // TODO: deprecated and write plain versions instead
207 
208     private template BaseType(V)
209     {
210         alias typeof( ( { V v; return v; }()).array[0]) BaseType;
211     }
212 
213     private template TrueMask(V)
214     {
215         alias Elem = BaseType!V;
216 
217         static if (is(Elem == float))
218         {
219             immutable uint m1 = 0xffffffff;
220             enum Elem TrueMask = *cast(float*)(&m1);
221         }
222         else static if (is(Elem == double))
223         {
224             immutable ulong m1 = 0xffffffff_ffffffff;
225             enum Elem TrueMask = *cast(double*)(&m1);
226         }
227         else // integer case
228         {
229             enum Elem TrueMask = -1;
230         }
231     }
232 
233     Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
234     {
235         enum size_t Count = Vec.array.length;
236         Vec result;
237         foreach(int i; 0..Count)
238         {
239             bool cond = a.array[i] == b.array[i];
240             result.ptr[i] = cond ? TrueMask!Vec : 0;
241         }
242         return result;
243     }
244 
245     Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
246     {
247         enum size_t Count = Vec.array.length;
248         Vec result;
249         foreach(int i; 0..Count)
250         {
251             bool cond = a.array[i] > b.array[i];
252             result.ptr[i] = cond ? TrueMask!Vec : 0;
253         }
254         return result;
255     }
256 }
257 
258 unittest
259 {
260     float4 a = [1, 3, 5, 7];
261     float4 b = [2, 3, 4, 5];
262     int4 c = cast(int4)(greaterMask!float4(a, b));
263     static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
264     assert(c.array == correct);
265 }
266 
267 static if (MMXSizedVectorsAreEmulated)
268 {
269     /// MMX-like SIMD types
270     struct float2
271     {
272         float[2] array;
273         mixin VectorOps!(float2, float[2]);
274     }
275 
276     struct byte8
277     {
278         byte[8] array;
279         mixin VectorOps!(byte8, byte[8]);
280     }
281 
282     struct short4
283     {
284         short[4] array;
285         mixin VectorOps!(short4, short[4]);
286     }
287 
288     struct int2
289     {
290         int[2] array;
291         mixin VectorOps!(int2, int[2]);
292     }
293 
294     struct long1
295     {
296         long[1] array;
297         mixin VectorOps!(long1, long[1]);
298     }
299 }
300 else
301 {
302     // For this compiler, defining MMX-sized vectors is working.
303     public import core.simd;
304     alias Vector!(long [1]) long1;
305     alias Vector!(float[2]) float2;
306     alias Vector!(int  [2]) int2;
307     alias Vector!(short[4]) short4;
308     alias Vector!(byte [8]) byte8;
309 }
310 
311 static assert(float2.sizeof == 8);
312 static assert(byte8.sizeof == 8);
313 static assert(short4.sizeof == 8);
314 static assert(int2.sizeof == 8);
315 static assert(long1.sizeof == 8);
316 
317 
318 static if (SSESizedVectorsAreEmulated)
319 {
320     /// SSE-like SIMD types
321 
322     struct float4
323     {
324         float[4] array;
325         mixin VectorOps!(float4, float[4]);
326     }
327 
328     struct byte16
329     {
330         byte[16] array;
331         mixin VectorOps!(byte16, byte[16]);
332     }
333 
334     struct short8
335     {
336         short[8] array;
337         mixin VectorOps!(short8, short[8]);
338     }
339 
340     struct int4
341     {
342         int[4] array;
343         mixin VectorOps!(int4, int[4]);
344     }
345 
346     struct long2
347     {
348         long[2] array;
349         mixin VectorOps!(long2, long[2]);
350     }
351 
352     struct double2
353     {
354         double[2] array;
355         mixin VectorOps!(double2, double[2]);
356     }
357 }
358 
359 static assert(float4.sizeof == 16);
360 static assert(byte16.sizeof == 16);
361 static assert(short8.sizeof == 16);
362 static assert(int4.sizeof == 16);
363 static assert(long2.sizeof == 16);
364 static assert(double2.sizeof == 16);
365 
366 
367 static if (AVXSizedVectorsAreEmulated)
368 {
369     /// AVX-like SIMD types
370 
371     struct float8
372     {
373         float[8] array;
374         mixin VectorOps!(float8, float[8]);
375     }
376 
377     struct byte32
378     {
379         byte[32] array;
380         mixin VectorOps!(byte32, byte[32]);
381     }
382 
383     struct short16
384     {
385         short[16] array;
386         mixin VectorOps!(short16, short[16]);
387     }
388 
389     struct int8
390     {
391         int[8] array;
392         mixin VectorOps!(int8, int[8]);
393     }
394 
395     struct long4
396     {
397         long[4] array;
398         mixin VectorOps!(long4, long[4]);
399     }
400 
401     struct double4
402     {
403         double[4] array;
404         mixin VectorOps!(double4, double[4]);
405     }
406 }
407 else
408 {
409     public import core.simd;    
410 }
411 static assert(float8.sizeof == 32);
412 static assert(byte32.sizeof == 32);
413 static assert(short16.sizeof == 32);
414 static assert(int8.sizeof == 32);
415 static assert(long4.sizeof == 32);
416 static assert(double4.sizeof == 32);
417 
418 
419 
420 
421 alias __m256 = float8;
422 alias __m256i = long4; // long long __vector with ICC, GCC, and clang
423 alias __m256d = double4;
424 alias __m128 = float4;
425 alias __m128i = int4;
426 alias __m128d = double2;
427 alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
428 
429 int _MM_SHUFFLE2(int x, int y) pure @safe
430 {
431     assert(x >= 0 && x <= 1);
432     assert(y >= 0 && y <= 1);
433     return (x << 1) | y;
434 }
435 
436 int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
437 {
438     assert(x >= 0 && x <= 3);
439     assert(y >= 0 && y <= 3);
440     assert(z >= 0 && z <= 3);
441     assert(w >= 0 && w <= 3);
442     return (z<<6) | (y<<4) | (x<<2) | w;
443 }
444 
445 // test assignment from scalar to vector type
446 unittest
447 {
448     float4 A = 3.0f;
449     float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
450     assert(A.array == correctA);
451 
452     int2 B = 42;
453     int[2] correctB = [42, 42];
454     assert(B.array == correctB);
455 }