The OpenD Programming Language

1 /**
2  * SIMD-accelerated Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0)
6  */
7 module containers.simdset;
8 
9 private import std.experimental.allocator.mallocator : Mallocator;
10 
11 /**
12  * Set implementation that is well suited for small sets and simple items.
13  *
14  * Uses SSE instructions to compare multiple elements simultaneously, but has
15  * linear time complexity.
16  *
17  * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in
18  * this container unless they are also stored somewhere else.
19  *
20  * Params:
21  *     T = the element type
22  *     Allocator = the allocator to use. Defaults to `Mallocator`.
23  */
24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator)
25 	if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)
26 {
27 	this(this) @disable;
28 
29 	private import std.experimental.allocator.common : stateSize;
30 
31 	static if (stateSize!Allocator != 0)
32 	{
33 		/// No default construction if an allocator must be provided.
34 		this() @disable;
35 
36 		/**
37 		 * Use the given `allocator` for allocations.
38 		 */
39 		this(Allocator allocator)
40 		in
41 		{
42 			static if (is(typeof(allocator is null)))
43 				assert(allocator !is null, "Allocator must not be null");
44 		}
45 		do
46 		{
47 			this.allocator = allocator;
48 		}
49 	}
50 
51 	~this()
52 	{
53 		scope (failure) assert(false, "SimdSet destructor threw an exception");
54 		clear();
55 	}
56 
57 	void clear()
58 	{
59 		allocator.deallocate(cast(void[]) storage);
60 		_length = 0;
61 		storage = [];
62 	}
63 
64 	/**
65 	 * Params:
66 	 *     item = the item to check
67 	 * Returns:
68 	 *     true if the set contains the given item
69 	 */
70 	bool contains(T item) const pure nothrow @nogc @trusted
71 	{
72 		if (_length == 0)
73 			return false;
74 		bool retVal;
75 		immutable remainder = _length % (16 / T.sizeof);
76 		ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1;
77 		//ushort resultMask;
78 		ulong ptrStart = cast(ulong) storage.ptr;
79 		ulong ptrEnd = ptrStart + storage.length * T.sizeof;
80 		static if (T.sizeof == 1)
81 			ulong needle = (cast(ubyte) item) * 0x01010101_01010101;
82 		else static if (T.sizeof == 2)
83 			ulong needle = (cast(ushort) item) * 0x00010001_00010001;
84 		else static if (T.sizeof == 4)
85 			ulong needle = (cast(ulong) item) * 0x00000001_00000001;
86 		else static if (T.sizeof == 8)
87 			ulong needle = cast(ulong) item;
88 		else
89 			static assert(false);
90 		mixin(asmSearch());
91 	end:
92 		return retVal;
93 	}
94 
95 	/// ditto
96 	bool opBinaryRight(string op)(T item) const pure nothrow @nogc @safe if (op == "in")
97 	{
98 		return contains(item);
99 	}
100 
101 	/**
102 	 * Inserts the given item into the set.
103 	 *
104 	 * Params:
105 	 *     item = the item to insert
106 	 * Returns:
107 	 *     true if the item was inserted or false if it was already present
108 	 */
109 	bool insert(T item)
110 	{
111 		if (contains(item))
112 			return false;
113 		if (storage.length > _length)
114 			storage[_length] = item;
115 		else
116 		{
117 			immutable size_t cl = (storage.length * T.sizeof);
118 			immutable size_t nl = cl + 16;
119 			void[] a = cast(void[]) storage;
120 			allocator.reallocate(a, nl);
121 			storage = cast(typeof(storage)) a;
122 			storage[_length] = item;
123 		}
124 		_length++;
125 		return true;
126 	}
127 
128 	/// ditto
129 	bool opOpAssign(string op)(T item) if (op == "~")
130 	{
131 		return insert(item);
132 	}
133 
134 	/// ditto
135 	alias insertAnywhere = insert;
136 
137 	/// ditto
138 	alias put = insert;
139 
140 	/**
141 	 * Removes the given item from the set.
142 	 *
143 	 * Params:
144 	 *     item = the time to remove
145 	 * Returns:
146 	 *     true if the item was removed, false if it was not present
147 	 */
148 	bool remove(T item)
149 	{
150 		import std.algorithm : countUntil;
151 
152 		// TODO: Make this more efficient
153 
154 		ptrdiff_t begin = countUntil(storage, item);
155 		if (begin == -1)
156 			return false;
157 		foreach (i; begin .. _length - 1)
158 			storage[i] = storage[i + 1];
159 		_length--;
160 		return true;
161 	}
162 
163 	/**
164 	 * Slice operator
165 	 */
166 	auto opSlice(this This)()
167 	{
168 		import containers.internal.element_type : ContainerElementType;
169 		return cast(ContainerElementType!(This, T)[]) storage[0 .. _length];
170 	}
171 
172 	/**
173 	 * Returns:
174 	 *     the number of items in the set
175 	 */
176 	size_t length() const pure nothrow @nogc @property
177 	{
178 		return _length;
179 	}
180 
181 	invariant
182 	{
183 		assert((storage.length * T.sizeof) % 16 == 0, "Bad storage length");
184 	}
185 
186 private:
187 
188 	import containers.internal.storage_type : ContainerStorageType;
189 	private import containers.internal.mixins : AllocatorState;
190 
191 	static string asmSearch()
192 	{
193 		import std.string : format;
194 
195 		static if (T.sizeof == 1)
196 			enum instruction = `pcmpeqb`;
197 		else static if (T.sizeof == 2)
198 			enum instruction = `pcmpeqw`;
199 		else static if (T.sizeof == 4)
200 			enum instruction = `pcmpeqd`;
201 		else static if (T.sizeof == 8)
202 			enum instruction = `pcmpeqq`;
203 		else
204 			static assert(false);
205 
206 		static if (__VERSION__ >= 2067)
207 			string s = `asm pure nothrow @nogc`;
208 		else
209 			string s = `asm`;
210 
211 		return s ~ `
212 		{
213 			mov R8, ptrStart;
214 			mov R9, ptrEnd;
215 			sub R8, 16;
216 			sub R9, 16;
217 			movq XMM0, needle;
218 			shufpd XMM0, XMM0, 0;
219 		loop:
220 			add R8, 16;
221 			movdqu XMM1, [R8];
222 			%s XMM1, XMM0;
223 			pmovmskb RAX, XMM1;
224 			//mov resultMask, AX;
225 			mov BX, AX;
226 			and BX, mask;
227 			cmp R8, R9;
228 			cmove AX, BX;
229 			popcnt AX, AX;
230 			test AX, AX;
231 			jnz found;
232 			cmp R8, R9;
233 			jl loop;
234 			mov retVal, 0;
235 			jmp end;
236 		found:
237 			mov retVal, 1;
238 			jmp end;
239 		}`.format(instruction);
240 	}
241 
242 	mixin AllocatorState!Allocator;
243 	ContainerStorageType!(T)[] storage;
244 	size_t _length;
245 }
246 
247 ///
248 version (D_InlineAsm_X86_64) version(emsi_containers_unittest) unittest
249 {
250 	import std.string : format;
251 
252 	void testSimdSet(T)()
253 	{
254 		SimdSet!T set;
255 		assert(set.insert(1));
256 		assert(set.length == 1);
257 		assert(set.contains(1));
258 		assert(!set.insert(1));
259 		set.insert(0);
260 		set.insert(20);
261 		assert(set.contains(1));
262 		assert(set.contains(0));
263 		assert(!set.contains(10));
264 		assert(!set.contains(50));
265 		assert(set.contains(20));
266 		foreach (T i; 28 .. 127)
267 			set.insert(i);
268 		foreach (T i; 28 .. 127)
269 			assert(set.contains(i), "%d".format(i));
270 		foreach (T i; 28 .. 127)
271 			assert(set.remove(i));
272 		assert(set.length == 3, "%d".format(set.length));
273 		assert(set.contains(0));
274 		assert(set.contains(1));
275 		assert(set.contains(20));
276 		assert(!set.contains(28));
277 	}
278 
279 	testSimdSet!ubyte();
280 	testSimdSet!ushort();
281 	testSimdSet!uint();
282 	testSimdSet!ulong();
283 	testSimdSet!byte();
284 	testSimdSet!short();
285 	testSimdSet!int();
286 	testSimdSet!long();
287 }
288 
289 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1
290 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8))
291 {
292 	import std.string : format;
293 	static assert (false, ("Cannot instantiate SimdSet of type %s because its size "
294 		~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof));
295 }