The OpenD Programming Language

1 /**
2  * Open-Addressed Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 module containers.openhashset;
8 
9 private import containers.internal.hash;
10 private import containers.internal.node : shouldAddGCRange;
11 private import std.experimental.allocator.common : stateSize;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 
14 /**
15  * Simple open-addressed hash set that uses linear probing to resolve sollisions.
16  *
17  * Params:
18  *     T = the element type of the hash set
19  *     Allocator = the allocator to use. Defaults to `Mallocator`.
20  *     hashFunction = the hash function to use
21  *     supportGC = if true, calls to GC.addRange and GC.removeRange will be used
22  *         to ensure that the GC does not accidentally free memory owned by this
23  *         container.
24  */
25 struct OpenHashSet(T, Allocator = Mallocator,
26 	alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
27 {
28 	/**
29 	 * Disallow copy construction
30 	 */
31 	this(this) @disable;
32 
33 	static if (stateSize!Allocator != 0)
34 	{
35 		this() @disable;
36 
37 		/**
38 		 * Use the given `allocator` for allocations.
39 		 */
40 		this(Allocator allocator)
41 		in
42 		{
43 			static if (is(typeof(allocator is null)))
44 				assert(allocator !is null, "Allocator must not be null");
45 		}
46 		do
47 		{
48 			this.allocator = allocator;
49 		}
50 
51 		/**
52 		 * Initializes the hash set with the given initial capacity.
53 		 *
54 		 * Params:
55 		 *     initialCapacity = the initial capacity for the hash set
56 		 *     allocator = allocator to use for allocations
57 		 */
58 		this(size_t initialCapacity, Allocator allocator)
59 		in
60 		{
61 			static if (is(typeof(allocator is null)))
62 				assert(allocator !is null, "Allocator must not be null");
63 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
64 		}
65 		do
66 		{
67 			this.allocator = allocator;
68 			initialize(initialCapacity);
69 		}
70 	}
71 	else
72 	{
73 		/**
74 		 * Initializes the hash set with the given initial capacity.
75 		 *
76 		 * Params:
77 		 *     initialCapacity = the initial capacity for the hash set
78 		 */
79 		this(size_t initialCapacity)
80 		in
81 		{
82 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
83 		}
84 		do
85 		{
86 			initialize(initialCapacity);
87 		}
88 	}
89 
90 	~this() nothrow
91 	{
92 		static if (useGC)
93 			GC.removeRange(nodes.ptr);
94 		allocator.deallocate(nodes);
95 	}
96 
97 	/**
98 	 * Removes all items from the hash set.
99 	 */
100 	void clear()
101 	{
102 		if (empty)
103 			return;
104 		foreach (ref node; nodes)
105 		{
106 			typeid(typeof(node)).destroy(&node);
107 			node.used = false;
108 		}
109 		_length = 0;
110 	}
111 
112 	///
113 	bool empty() const pure nothrow @nogc @safe @property
114 	{
115 		return _length == 0;
116 	}
117 
118 	///
119 	size_t length() const pure nothrow @nogc @safe @property
120 	{
121 		return _length;
122 	}
123 
124 	/**
125 	 * Returns:
126 	 *     $(B true) if the hash set contains the given item, false otherwise.
127 	 */
128 	bool contains(T item) const
129 	{
130 		if (empty)
131 			return false;
132 		immutable size_t hash = hashFunction(item);
133 		size_t index = toIndex(nodes, item, hash);
134 		if (index == size_t.max)
135 			return false;
136 		return nodes[index].hash == hash && nodes[index].data == item;
137 	}
138 
139 	/// ditto
140 	bool opBinaryRight(string op)(T item) inout if (op == "in")
141 	{
142 		return contains(item);
143 	}
144 
145 	/**
146 	 * Inserts the given item into the set.
147 	 *
148 	 * Returns:
149 	 *     $(B true) if the item was inserted, false if it was already present.
150 	 */
151 	bool insert(T item)
152 	{
153 		if (nodes.length == 0)
154 			initialize(DEFAULT_BUCKET_COUNT);
155 		immutable size_t hash = hashFunction(item);
156 		size_t index = toIndex(nodes, item, hash);
157 		if (index == size_t.max)
158 		{
159 			grow();
160 			index = toIndex(nodes, item, hash);
161 		}
162 		else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item)
163 			return false;
164 		nodes[index].used = true;
165 		nodes[index].hash = hash;
166 		nodes[index].data = item;
167 		_length++;
168 		return true;
169 	}
170 
171 	/// ditto
172 	alias put = insert;
173 
174 	/// ditto
175 	alias insertAnywhere = insert;
176 
177 	/// ditto
178 	bool opOpAssign(string op)(T item) if (op == "~")
179 	{
180 		return insert(item);
181 	}
182 
183 	/**
184 	 * Params:
185 	 *     item = the item to remove
186 	 * Returns:
187 	 *     $(B true) if the item was removed, $(B false) if it was not present
188 	 */
189 	bool remove(T item)
190 	{
191 		if (empty)
192 			return false;
193 		immutable size_t hash = hashFunction(item);
194 		size_t index = toIndex(nodes, item, hash);
195 		if (index == size_t.max)
196 			return false;
197 		nodes[index].used = false;
198 		destroy(nodes[index].data);
199 		_length--;
200 		return true;
201 	}
202 
203 	/**
204 	 * Returns:
205 	 *     A range over the set.
206 	 */
207 	auto opSlice(this This)() nothrow pure @nogc @safe
208 	{
209 		return Range!(This)(nodes);
210 	}
211 
212 	mixin AllocatorState!Allocator;
213 
214 private:
215 
216 	import containers.internal.element_type : ContainerElementType;
217 	import containers.internal.mixins : AllocatorState;
218 	import containers.internal.storage_type : ContainerStorageType;
219 	import core.memory : GC;
220 
221 	enum bool useGC = supportGC && shouldAddGCRange!T;
222 
223 	static struct Range(ThisT)
224 	{
225 		ET front()
226 		{
227 			return cast(typeof(return)) nodes[index].data;
228 		}
229 
230 		bool empty() const pure nothrow @safe @nogc @property
231 		{
232 			return index >= nodes.length;
233 		}
234 
235 		void popFront() pure nothrow @safe @nogc
236 		{
237 			index++;
238 			while (index < nodes.length && !nodes[index].used)
239 				index++;
240 		}
241 
242 	private:
243 
244 		alias ET = ContainerElementType!(ThisT, T);
245 
246 		this(const Node[] nodes)
247 		{
248 			this.nodes = nodes;
249 			while (true)
250 			{
251 				if (index >= nodes.length || nodes[index].used)
252 					break;
253 				index++;
254 			}
255 		}
256 
257 		size_t index;
258 		const Node[] nodes;
259 	}
260 
261 	void grow()
262 	{
263 		immutable size_t newCapacity = nodes.length << 1;
264 		Node[] newNodes = (cast (Node*) allocator.allocate(newCapacity * Node.sizeof))
265 			[0 .. newCapacity];
266 		newNodes[] = Node.init;
267 		static if (useGC)
268 			GC.addRange(newNodes.ptr, newNodes.length * Node.sizeof, typeid(typeof(nodes)));
269 		foreach (ref node; nodes)
270 		{
271 			immutable size_t newIndex = toIndex(newNodes, node.data, node.hash);
272 			newNodes[newIndex] = node;
273 		}
274 		static if (useGC)
275 			GC.removeRange(nodes.ptr);
276 		allocator.deallocate(nodes);
277 		nodes = newNodes;
278 	}
279 
280 	void initialize(size_t nodeCount)
281 	{
282 		nodes = (cast (Node*) allocator.allocate(nodeCount * Node.sizeof))[0 .. nodeCount];
283 		static if (useGC)
284 			GC.addRange(nodes.ptr, nodes.length * Node.sizeof, typeid(typeof(nodes)));
285         nodes[] = Node.init;
286 		_length = 0;
287 	}
288 
289 	// Returns: size_t.max if the item was not found
290 	static size_t toIndex(const Node[] n, T item, size_t hash)
291 	{
292 		assert (n.length > 0, "Empty node");
293 		immutable size_t index = hashToIndex(hash, n.length);
294 		size_t i = index;
295 		immutable bucketMask = n.length - 1;
296 		while (n[i].used && n[i].data != item)
297 		{
298 			i = (i + 1) & bucketMask;
299 			if (i == index)
300 				return size_t.max;
301 		}
302 		return i;
303 	}
304 
305 	Node[] nodes;
306 	size_t _length;
307 
308 	static struct Node
309 	{
310 		ContainerStorageType!T data;
311 		bool used;
312 		size_t hash;
313 	}
314 }
315 
316 version(emsi_containers_unittest) unittest
317 {
318 	import std.string : format;
319 	import std.algorithm : equal, sort;
320 	import std.range : iota;
321 	import std.array : array;
322 	OpenHashSet!int ints;
323 	assert (ints.empty);
324 	assert (equal(ints[], cast(int[]) []));
325 	ints.clear();
326 	ints.insert(10);
327 	assert (!ints.empty);
328 	assert (ints.length == 1);
329 	assert (equal(ints[], [10]));
330 	assert (ints.contains(10));
331 	ints.clear();
332 	assert (ints.length == 0);
333 	assert (ints.empty);
334 	ints ~= 0;
335 	assert (!ints.empty);
336 	assert (ints.length == 1);
337 	assert (equal(ints[], [0]));
338 	ints.clear();
339 	assert (ints.length == 0);
340 	assert (ints.empty);
341 	foreach (i; 0 .. 100)
342 		ints ~= i;
343 	assert (ints.length == 100, "%d".format(ints.length));
344 	assert (!ints.empty);
345 	foreach (i; 0 .. 100)
346 		assert (i in ints);
347 	assert (equal(ints[].array().sort(), iota(0, 100)));
348 	assert (ints.insert(10) == false);
349 	auto ohs = OpenHashSet!int(8);
350 	assert (!ohs.remove(1000));
351 	assert (ohs.contains(99) == false);
352 	assert (ohs.insert(10) == true);
353 	assert (ohs.insert(10) == false);
354 	foreach (i; 0 .. 7)
355 		ohs.insert(i);
356 	assert (ohs.contains(6));
357 	assert (!ohs.contains(100));
358 	assert (!ohs.remove(9999));
359 	assert (ohs.remove(0));
360 	assert (ohs.remove(1));
361 }
362 
363 version(emsi_containers_unittest) unittest
364 {
365 	static class Foo
366 	{
367 		string name;
368 
369 		override bool opEquals(Object other) const @safe pure nothrow @nogc
370 		{
371 			Foo f = cast(Foo)other;
372 			return f !is null && f.name == this.name;
373 		}
374 	}
375 
376 	hash_t stringToHash(string str) @safe pure nothrow @nogc
377 	{
378 		hash_t hash = 5381;
379 		return hash;
380 	}
381 
382 	hash_t FooToHash(Foo e) pure @safe nothrow @nogc
383 	{
384 		return stringToHash(e.name);
385 	}
386 
387 	OpenHashSet!(Foo, Mallocator, FooToHash) hs;
388 	auto f = new Foo;
389 	hs.insert(f);
390 	assert(f in hs);
391 	auto r = hs[];
392 }