1 /** 2 * SHA intrinsics. 3 * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=SHA 4 * 5 * Copyright: Guillaume Piolat 2021. 6 * Johan Engelen 2021. 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 */ 9 module inteli.shaintrin; 10 11 // SHA instructions 12 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA 13 // Note: this header will work whether you have SHA enabled or not. 14 // With LDC, use "dflags-ldc": ["-mattr=+sha"] or equivalent to actively 15 // generate SHA instructions. 16 // With GDC, use "dflags-gdc": ["-msha"] or equivalent to generate SHA instructions. 17 18 public import inteli.types; 19 import inteli.internals; 20 21 22 23 nothrow @nogc: 24 25 /+ 26 /// Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from a and b, and store the result in dst. 27 __m128i _mm_sha1nexte_epu32(__m128i a, __m128i b) @trusted 28 { 29 static if (SHA_builtins) 30 { 31 return __builtin_ia32_sha1nexte(cast(int4) a, cast(int4) b); 32 } 33 else 34 { 35 assert(0); 36 } 37 } 38 unittest 39 { 40 } 41 +/ 42 43 /+ 44 /// Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in a and the previous message values in b, and store the result in dst. 45 __m128i _mm_sha1msg1_epu32(__m128i a, __m128i b) @trusted 46 { 47 static if (SHA_builtins) 48 { 49 return __builtin_ia32_sha1msg1(cast(int4) a, cast(int4) b); 50 } 51 else 52 { 53 assert(0); 54 } 55 } 56 unittest 57 { 58 } 59 +/ 60 61 /+ 62 /// Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable a, add that value to the scheduled values (unsigned 32-bit integers) in b, and store the result in dst. 63 __m128i _mm_sha1msg2_epu32(__m128i a, __m128i b) @trusted 64 { 65 static if (SHA_builtins) 66 { 67 return __builtin_ia32_sha1msg2(cast(int4) a, cast(int4) b); 68 } 69 else 70 { 71 assert(0); 72 } 73 } 74 unittest 75 { 76 } 77 +/ 78 79 /+ 80 /// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from a and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from b, and store the updated SHA1 state (A,B,C,D) in dst. func contains the logic functions and round constants. 81 __m128i _mm_sha1rnds4_epu32(__m128i a, __m128i b, const int func) @trusted 82 { 83 static if (SHA_builtins) 84 { 85 return __builtin_ia32_sha1rnds4(cast(int4) a, cast(int4) b, func); 86 } 87 else 88 { 89 assert(0); 90 } 91 92 } 93 +/ 94 95 /// Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 96 __m128i _mm_sha256msg1_epu32(__m128i a, __m128i b) @trusted 97 { 98 static if (GDC_or_LDC_with_SHA) 99 { 100 return __builtin_ia32_sha256msg1(cast(int4) a, cast(int4) b); 101 } 102 else 103 { 104 static uint sigma0(uint x) nothrow @nogc @safe 105 { 106 return bitwiseRotateRight_uint(x, 7) ^ bitwiseRotateRight_uint(x, 18) ^ x >> 3; 107 } 108 109 int4 dst; 110 int4 a4 = cast(int4) a; 111 int4 b4 = cast(int4) b; 112 uint W4 = b4.array[0]; 113 uint W3 = a4.array[3]; 114 uint W2 = a4.array[2]; 115 uint W1 = a4.array[1]; 116 uint W0 = a4.array[0]; 117 dst.ptr[3] = W3 + sigma0(W4); 118 dst.ptr[2] = W2 + sigma0(W3); 119 dst.ptr[1] = W1 + sigma0(W2); 120 dst.ptr[0] = W0 + sigma0(W1); 121 return cast(__m128i) dst; 122 } 123 } 124 unittest 125 { 126 __m128i a = [15, 20, 130, 12345]; 127 __m128i b = [15, 20, 130, 12345]; 128 __m128i result = _mm_sha256msg1_epu32(a, b); 129 assert(result.array == [671416337, 69238821, 2114864873, 503574586]); 130 } 131 132 /// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from k, and return the updated SHA256 state (A,B,E,F). 133 __m128i _mm_sha256msg2_epu32(__m128i a, __m128i b) @trusted 134 { 135 static if (GDC_or_LDC_with_SHA) 136 { 137 return __builtin_ia32_sha256msg2(cast(int4) a, cast(int4) b); 138 } 139 else 140 { 141 static uint sigma1(uint x) nothrow @nogc @safe 142 { 143 return bitwiseRotateRight_uint(x, 17) ^ bitwiseRotateRight_uint(x, 19) ^ x >> 10; 144 } 145 146 int4 dst; 147 int4 a4 = cast(int4) a; 148 int4 b4 = cast(int4) b; 149 uint W14 = b4.array[2]; 150 uint W15 = b4.array[3]; 151 uint W16 = a4.array[0] + sigma1(W14); 152 uint W17 = a4.array[1] + sigma1(W15); 153 uint W18 = a4.array[2] + sigma1(W16); 154 uint W19 = a4.array[3] + sigma1(W17); 155 dst.ptr[3] = W19; 156 dst.ptr[2] = W18; 157 dst.ptr[1] = W17; 158 dst.ptr[0] = W16; 159 return cast(__m128i) dst; 160 } 161 } 162 unittest 163 { 164 __m128i a = [15, 20, 130, 12345]; 165 __m128i b = [15, 20, 130, 12345]; 166 __m128i result = _mm_sha256msg2_epu32(a, b); 167 assert(result.array == [5324815, 505126944, -2012842764, -1542210977]); 168 } 169 170 /// Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from `a` and `b`, and return the result. 171 __m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k) @trusted 172 { 173 // TODO: the pragma(inline) false prevent a DMD 1.100 174 // regression in Linux + x86_64 + -b release-unittest, report that 175 176 version(DigitalMars) 177 { 178 enum bool workaround = true; 179 } 180 else 181 { 182 enum bool workaround = false; 183 } 184 185 static if (GDC_or_LDC_with_SHA) 186 { 187 return __builtin_ia32_sha256rnds2(cast(int4) a, cast(int4) b, cast(int4) k); 188 } 189 else 190 { 191 static uint Ch(uint x, uint y, uint z) nothrow @nogc @safe 192 { 193 static if (workaround) pragma (inline, false); 194 return z ^ (x & (y ^ z)); 195 } 196 197 static uint Maj(uint x, uint y, uint z) nothrow @nogc @safe 198 { 199 static if (workaround) pragma (inline, false); 200 return (x & y) | (z & (x ^ y)); 201 } 202 203 static uint sum0(uint x) nothrow @nogc @safe 204 { 205 static if (workaround) pragma (inline, false); 206 return bitwiseRotateRight_uint(x, 2) ^ bitwiseRotateRight_uint(x, 13) ^ bitwiseRotateRight_uint(x, 22); 207 } 208 209 static uint sum1(uint x) nothrow @nogc @safe 210 { 211 static if (workaround) pragma (inline, false); 212 return bitwiseRotateRight_uint(x, 6) ^ bitwiseRotateRight_uint(x, 11) ^ bitwiseRotateRight_uint(x, 25); 213 } 214 215 int4 dst; 216 int4 a4 = cast(int4) a; 217 int4 b4 = cast(int4) b; 218 int4 k4 = cast(int4) k; 219 220 const A0 = b4.array[3]; 221 const B0 = b4.array[2]; 222 const C0 = a4.array[3]; 223 const D0 = a4.array[2]; 224 const E0 = b4.array[1]; 225 const F0 = b4.array[0]; 226 const G0 = a4.array[1]; 227 const H0 = a4.array[0]; 228 const W_K0 = k4.array[0]; 229 const W_K1 = k4.array[1]; 230 const A1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + Maj(A0, B0, C0) + sum0(A0); 231 const B1 = A0; 232 const C1 = B0; 233 const D1 = C0; 234 const E1 = Ch(E0, F0, G0) + sum1(E0) + W_K0 + H0 + D0; 235 const F1 = E0; 236 const G1 = F0; 237 const H1 = G0; 238 const A2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + Maj(A1, B1, C1) + sum0(A1); 239 const B2 = A1; 240 const C2 = B1; 241 const D2 = C1; 242 const E2 = Ch(E1, F1, G1) + sum1(E1) + W_K1 + H1 + D1; 243 const F2 = E1; 244 const G2 = F1; 245 const H2 = G1; 246 247 dst.ptr[3] = A2; 248 dst.ptr[2] = B2; 249 dst.ptr[1] = E2; 250 dst.ptr[0] = F2; 251 252 return cast(__m128i) dst; 253 } 254 } 255 unittest 256 { 257 __m128i a = [15, 20, 130, 12345]; 258 __m128i b = [15, 20, 130, 12345]; 259 __m128i k = [15, 20, 130, 12345]; 260 __m128i result = _mm_sha256rnds2_epu32(a, b, k); 261 assert(result.array == [1384123044, -2050674062, 327754346, 956342016]); 262 } 263 264 private uint bitwiseRotateRight_uint(const uint value, const uint count) @safe 265 { 266 assert(count < 8 * uint.sizeof); 267 return cast(uint) ((value >> count) | (value << (uint.sizeof * 8 - count))); 268 }