1 /** 2 * SIMD-accelerated Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0) 6 */ 7 module containers.simdset; 8 9 private import std.experimental.allocator.mallocator : Mallocator; 10 11 /** 12 * Set implementation that is well suited for small sets and simple items. 13 * 14 * Uses SSE instructions to compare multiple elements simultaneously, but has 15 * linear time complexity. 16 * 17 * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in 18 * this container unless they are also stored somewhere else. 19 * 20 * Params: 21 * T = the element type 22 * Allocator = the allocator to use. Defaults to `Mallocator`. 23 */ 24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator) 25 if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8) 26 { 27 this(this) @disable; 28 29 private import std.experimental.allocator.common : stateSize; 30 31 static if (stateSize!Allocator != 0) 32 { 33 /// No default construction if an allocator must be provided. 34 this() @disable; 35 36 /** 37 * Use the given `allocator` for allocations. 38 */ 39 this(Allocator allocator) 40 in 41 { 42 static if (is(typeof(allocator is null))) 43 assert(allocator !is null, "Allocator must not be null"); 44 } 45 do 46 { 47 this.allocator = allocator; 48 } 49 } 50 51 ~this() 52 { 53 scope (failure) assert(false, "SimdSet destructor threw an exception"); 54 clear(); 55 } 56 57 void clear() 58 { 59 allocator.deallocate(cast(void[]) storage); 60 _length = 0; 61 storage = []; 62 } 63 64 /** 65 * Params: 66 * item = the item to check 67 * Returns: 68 * true if the set contains the given item 69 */ 70 bool contains(T item) const pure nothrow @nogc @trusted 71 { 72 if (_length == 0) 73 return false; 74 bool retVal; 75 immutable remainder = _length % (16 / T.sizeof); 76 ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1; 77 //ushort resultMask; 78 ulong ptrStart = cast(ulong) storage.ptr; 79 ulong ptrEnd = ptrStart + storage.length * T.sizeof; 80 static if (T.sizeof == 1) 81 ulong needle = (cast(ubyte) item) * 0x01010101_01010101; 82 else static if (T.sizeof == 2) 83 ulong needle = (cast(ushort) item) * 0x00010001_00010001; 84 else static if (T.sizeof == 4) 85 ulong needle = (cast(ulong) item) * 0x00000001_00000001; 86 else static if (T.sizeof == 8) 87 ulong needle = cast(ulong) item; 88 else 89 static assert(false); 90 mixin(asmSearch()); 91 end: 92 return retVal; 93 } 94 95 /// ditto 96 bool opBinaryRight(string op)(T item) const pure nothrow @nogc @safe if (op == "in") 97 { 98 return contains(item); 99 } 100 101 /** 102 * Inserts the given item into the set. 103 * 104 * Params: 105 * item = the item to insert 106 * Returns: 107 * true if the item was inserted or false if it was already present 108 */ 109 bool insert(T item) 110 { 111 if (contains(item)) 112 return false; 113 if (storage.length > _length) 114 storage[_length] = item; 115 else 116 { 117 immutable size_t cl = (storage.length * T.sizeof); 118 immutable size_t nl = cl + 16; 119 void[] a = cast(void[]) storage; 120 allocator.reallocate(a, nl); 121 storage = cast(typeof(storage)) a; 122 storage[_length] = item; 123 } 124 _length++; 125 return true; 126 } 127 128 /// ditto 129 bool opOpAssign(string op)(T item) if (op == "~") 130 { 131 return insert(item); 132 } 133 134 /// ditto 135 alias insertAnywhere = insert; 136 137 /// ditto 138 alias put = insert; 139 140 /** 141 * Removes the given item from the set. 142 * 143 * Params: 144 * item = the time to remove 145 * Returns: 146 * true if the item was removed, false if it was not present 147 */ 148 bool remove(T item) 149 { 150 import std.algorithm : countUntil; 151 152 // TODO: Make this more efficient 153 154 ptrdiff_t begin = countUntil(storage, item); 155 if (begin == -1) 156 return false; 157 foreach (i; begin .. _length - 1) 158 storage[i] = storage[i + 1]; 159 _length--; 160 return true; 161 } 162 163 /** 164 * Slice operator 165 */ 166 auto opSlice(this This)() 167 { 168 import containers.internal.element_type : ContainerElementType; 169 return cast(ContainerElementType!(This, T)[]) storage[0 .. _length]; 170 } 171 172 /** 173 * Returns: 174 * the number of items in the set 175 */ 176 size_t length() const pure nothrow @nogc @property 177 { 178 return _length; 179 } 180 181 invariant 182 { 183 assert((storage.length * T.sizeof) % 16 == 0, "Bad storage length"); 184 } 185 186 private: 187 188 import containers.internal.storage_type : ContainerStorageType; 189 private import containers.internal.mixins : AllocatorState; 190 191 static string asmSearch() 192 { 193 import std.string : format; 194 195 static if (T.sizeof == 1) 196 enum instruction = `pcmpeqb`; 197 else static if (T.sizeof == 2) 198 enum instruction = `pcmpeqw`; 199 else static if (T.sizeof == 4) 200 enum instruction = `pcmpeqd`; 201 else static if (T.sizeof == 8) 202 enum instruction = `pcmpeqq`; 203 else 204 static assert(false); 205 206 static if (__VERSION__ >= 2067) 207 string s = `asm pure nothrow @nogc`; 208 else 209 string s = `asm`; 210 211 return s ~ ` 212 { 213 mov R8, ptrStart; 214 mov R9, ptrEnd; 215 sub R8, 16; 216 sub R9, 16; 217 movq XMM0, needle; 218 shufpd XMM0, XMM0, 0; 219 loop: 220 add R8, 16; 221 movdqu XMM1, [R8]; 222 %s XMM1, XMM0; 223 pmovmskb RAX, XMM1; 224 //mov resultMask, AX; 225 mov BX, AX; 226 and BX, mask; 227 cmp R8, R9; 228 cmove AX, BX; 229 popcnt AX, AX; 230 test AX, AX; 231 jnz found; 232 cmp R8, R9; 233 jl loop; 234 mov retVal, 0; 235 jmp end; 236 found: 237 mov retVal, 1; 238 jmp end; 239 }`.format(instruction); 240 } 241 242 mixin AllocatorState!Allocator; 243 ContainerStorageType!(T)[] storage; 244 size_t _length; 245 } 246 247 /// 248 version (D_InlineAsm_X86_64) version(emsi_containers_unittest) unittest 249 { 250 import std.string : format; 251 252 void testSimdSet(T)() 253 { 254 SimdSet!T set; 255 assert(set.insert(1)); 256 assert(set.length == 1); 257 assert(set.contains(1)); 258 assert(!set.insert(1)); 259 set.insert(0); 260 set.insert(20); 261 assert(set.contains(1)); 262 assert(set.contains(0)); 263 assert(!set.contains(10)); 264 assert(!set.contains(50)); 265 assert(set.contains(20)); 266 foreach (T i; 28 .. 127) 267 set.insert(i); 268 foreach (T i; 28 .. 127) 269 assert(set.contains(i), "%d".format(i)); 270 foreach (T i; 28 .. 127) 271 assert(set.remove(i)); 272 assert(set.length == 3, "%d".format(set.length)); 273 assert(set.contains(0)); 274 assert(set.contains(1)); 275 assert(set.contains(20)); 276 assert(!set.contains(28)); 277 } 278 279 testSimdSet!ubyte(); 280 testSimdSet!ushort(); 281 testSimdSet!uint(); 282 testSimdSet!ulong(); 283 testSimdSet!byte(); 284 testSimdSet!short(); 285 testSimdSet!int(); 286 testSimdSet!long(); 287 } 288 289 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1 290 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)) 291 { 292 import std.string : format; 293 static assert (false, ("Cannot instantiate SimdSet of type %s because its size " 294 ~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof)); 295 }