The OpenD Programming Language

1 /**
2  * Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 private import containers.internal.hash : generateHash, hashToIndex;
11 private import containers.internal.node : shouldAddGCRange;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 private import std.traits : isBasicType;
14 
15 /**
16  * Hash Set.
17  * Params:
18  *     T = the element type
19  *     Allocator = the allocator to use. Defaults to `Mallocator`.
20  *     hashFunction = the hash function to use on the elements
21  *     supportGC = true if the container should support holding references to
22  *         GC-allocated memory.
23  */
24 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
25 	bool supportGC = shouldAddGCRange!T,
26 	bool storeHash = !isBasicType!T)
27 {
28 	this(this) @disable;
29 
30 	private import std.experimental.allocator.common : stateSize;
31 
32 	static if (stateSize!Allocator != 0)
33 	{
34 		this() @disable;
35 
36 		/**
37 		 * Use the given `allocator` for allocations.
38 		 */
39 		this(Allocator allocator)
40 		in
41 		{
42 			static if (is(typeof(allocator is null)))
43 				assert(allocator !is null, "Allocator must not be null");
44 		}
45 		do
46 		{
47 			this.allocator = allocator;
48 		}
49 
50 		/**
51 		 * Constructs a HashSet with an initial bucket count of bucketCount.
52 		 * bucketCount must be a power of two.
53 		 */
54 		this(size_t bucketCount, Allocator allocator)
55 		in
56 		{
57 			static if (is(typeof(allocator is null)))
58 				assert(allocator !is null, "Allocator must not be null");
59 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
60 		}
61 		do
62 		{
63 			this.allocator = allocator;
64 			initialize(bucketCount);
65 		}
66 	}
67 	else
68 	{
69 		/**
70 		 * Constructs a HashSet with an initial bucket count of bucketCount.
71 		 * bucketCount must be a power of two.
72 		 */
73 		this(size_t bucketCount)
74 		in
75 		{
76 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
77 		}
78 		do
79 		{
80 			initialize(bucketCount);
81 		}
82 
83 	}
84 
85 	~this()
86 	{
87 		import std.experimental.allocator : dispose;
88 		import core.memory : GC;
89 		static if (useGC)
90 			GC.removeRange(buckets.ptr);
91 		allocator.dispose(buckets);
92 	}
93 
94 	/**
95 	 * Removes all items from the set
96 	 */
97 	void clear()
98 	{
99 		foreach (ref bucket; buckets)
100 		{
101 			destroy(bucket);
102 			bucket = Bucket.init;
103 		}
104 		_length = 0;
105 	}
106 
107 	/**
108 	 * Removes the given item from the set.
109 	 * Returns: false if the value was not present
110 	 */
111 	bool remove(T value)
112 	{
113 		if (buckets.length == 0)
114 			return false;
115 		immutable Hash hash = hashFunction(value);
116 		immutable size_t index = hashToIndex(hash, buckets.length);
117 		static if (storeHash)
118 			immutable bool removed = buckets[index].remove(ItemNode(hash, value));
119 		else
120 			immutable bool removed = buckets[index].remove(ItemNode(value));
121 		if (removed)
122 			--_length;
123 		return removed;
124 	}
125 
126 	/**
127 	 * Returns: true if value is contained in the set.
128 	 */
129 	bool contains(T value) inout
130 	{
131 		return (value in this) !is null;
132 	}
133 
134 	/**
135 	 * Supports $(B a in b) syntax
136 	 */
137 	inout(T)* opBinaryRight(string op)(T value) inout if (op == "in")
138 	{
139 		if (buckets.length == 0 || _length == 0)
140 			return null;
141 		immutable Hash hash = hashFunction(value);
142 		immutable index = hashToIndex(hash, buckets.length);
143 		return buckets[index].get(value, hash);
144 	}
145 
146 	/**
147 	 * Inserts the given item into the set.
148 	 * Params: value = the value to insert
149 	 * Returns: true if the value was actually inserted, or false if it was
150 	 *     already present.
151 	 */
152 	bool insert(T value)
153 	{
154 		if (buckets.length == 0)
155 			initialize(4);
156 		Hash hash = hashFunction(value);
157 		immutable size_t index = hashToIndex(hash, buckets.length);
158 		static if (storeHash)
159 			auto r = buckets[index].insert(ItemNode(hash, value));
160 		else
161 			auto r = buckets[index].insert(ItemNode(value));
162 		if (r)
163 			++_length;
164 		if (shouldRehash)
165 			rehash();
166 		return r;
167 	}
168 
169 	/// ditto
170 	bool opOpAssign(string op)(T item) if (op == "~")
171 	{
172 		return insert(item);
173 	}
174 
175 	/// ditto
176 	alias put = insert;
177 
178 	/// ditto
179 	alias insertAnywhere = insert;
180 
181 	/**
182 	 * Returns: true if the set has no items
183 	 */
184 	bool empty() const nothrow pure @nogc @safe @property
185 	{
186 		return _length == 0;
187 	}
188 
189 	/**
190 	 * Returns: the number of items in the set
191 	 */
192 	size_t length() const nothrow pure @nogc @safe @property
193 	{
194 		return _length;
195 	}
196 
197 	/**
198 	 * Forward range interface
199 	 */
200 	auto opSlice(this This)() nothrow @nogc @trusted
201 	{
202 		return Range!(This)(&this);
203 	}
204 
205 private:
206 
207 	import containers.internal.element_type : ContainerElementType;
208 	import containers.internal.mixins : AllocatorState;
209 	import containers.internal.node : shouldAddGCRange, FatNodeInfo;
210 	import containers.internal.storage_type : ContainerStorageType;
211 	import std.traits : isPointer;
212 
213 	alias LengthType = ubyte;
214 	alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof);
215 	enum ITEMS_PER_NODE = N[0];
216 	static assert(LengthType.max > ITEMS_PER_NODE);
217 	enum bool useGC = supportGC && shouldAddGCRange!T;
218 	alias Hash = typeof({ T v = void; return hashFunction(v); }());
219 
220 	void initialize(size_t bucketCount)
221 	{
222 		import core.memory : GC;
223 		import std.experimental.allocator : makeArray;
224 
225 		makeBuckets(bucketCount);
226 		static if (useGC)
227 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
228 	}
229 
230 	static struct Range(ThisT)
231 	{
232 		this(ThisT* t)
233 		{
234 			foreach (i, ref bucket; t.buckets)
235 			{
236 				bucketIndex = i;
237 				if (bucket.root !is null)
238 				{
239 					currentNode = cast(Bucket.BucketNode*) bucket.root;
240 					break;
241 				}
242 			}
243 			this.t = t;
244 		}
245 
246 		bool empty() const nothrow @safe @nogc @property
247 		{
248 			return currentNode is null;
249 		}
250 
251 		ET front() nothrow @safe @nogc @property
252 		{
253 			return cast(ET) currentNode.items[nodeIndex].value;
254 		}
255 
256 		void popFront() nothrow @trusted @nogc
257 		{
258 			if (nodeIndex + 1 < currentNode.l)
259 			{
260 				++nodeIndex;
261 				return;
262 			}
263 			else
264 			{
265 				nodeIndex = 0;
266 				if (currentNode.next is null)
267 				{
268 					++bucketIndex;
269 					while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null)
270 						++bucketIndex;
271 					if (bucketIndex < t.buckets.length)
272 						currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root;
273 					else
274 						currentNode = null;
275 				}
276 				else
277 				{
278 					currentNode = currentNode.next;
279 					assert(currentNode.l > 0, "Empty node");
280 				}
281 			}
282 		}
283 
284 	private:
285 		alias ET = ContainerElementType!(ThisT, T);
286 		ThisT* t;
287 		Bucket.BucketNode* currentNode;
288 		size_t bucketIndex;
289 		size_t nodeIndex;
290 	}
291 
292 	void makeBuckets(size_t bucketCount)
293 	{
294 		import std.experimental.allocator : makeArray;
295 
296 		static if (stateSize!Allocator == 0)
297 			buckets = allocator.makeArray!Bucket(bucketCount);
298 		else
299 		{
300 			import std.conv:emplace;
301 
302 			buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount);
303 			foreach (ref bucket; buckets)
304 				emplace!Bucket(&bucket, allocator);
305 		}
306 	}
307 
308 	bool shouldRehash() const pure nothrow @safe @nogc
309 	{
310 		immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE;
311 		return (numberOfNodes / cast(float) buckets.length) > 0.75f;
312 	}
313 
314 	void rehash() @trusted
315 	{
316 		import std.experimental.allocator : makeArray, dispose;
317 		import core.memory : GC;
318 
319 		immutable size_t newLength = buckets.length << 1;
320 		Bucket[] oldBuckets = buckets;
321 		makeBuckets(newLength);
322 		assert (buckets, "Bucket reallocation failed");
323 		assert (buckets.length == newLength, "Bucket reallocation size mismatch");
324 		static if (useGC)
325 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
326 		foreach (ref const bucket; oldBuckets)
327 		{
328 			for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next)
329 			{
330 				for (size_t i = 0; i < node.l; ++i)
331 				{
332 					static if (storeHash)
333 					{
334 						immutable Hash hash = node.items[i].hash;
335 						size_t index = hashToIndex(hash, buckets.length);
336 						buckets[index].insert(ItemNode(hash, node.items[i].value));
337 					}
338 					else
339 					{
340 						immutable Hash hash = hashFunction(node.items[i].value);
341 						size_t index = hashToIndex(hash, buckets.length);
342 						buckets[index].insert(ItemNode(node.items[i].value));
343 					}
344 				}
345 			}
346 		}
347 		static if (useGC)
348 			GC.removeRange(oldBuckets.ptr);
349 		allocator.dispose(oldBuckets);
350 	}
351 
352 	static struct Bucket
353 	{
354 		this(this) @disable;
355 
356 		static if (stateSize!Allocator != 0)
357 		{
358 			this(Allocator allocator)
359 			{
360 				this.allocator = allocator;
361 			}
362 			this() @disable;
363 		}
364 
365 		~this()
366 		{
367 			import core.memory : GC;
368 			import std.experimental.allocator : dispose;
369 
370 			BucketNode* current = root;
371 			BucketNode* previous;
372 			while (true)
373 			{
374 				if (previous !is null)
375 				{
376 					static if (useGC)
377 						GC.removeRange(previous);
378 					allocator.dispose(previous);
379 				}
380 				previous = current;
381 				if (current is null)
382 					break;
383 				current = current.next;
384 			}
385 		}
386 
387 		static struct BucketNode
388 		{
389 			ContainerStorageType!(T)* get(ItemNode n)
390 			{
391 				foreach (ref item; items[0 .. l])
392 				{
393 					static if (storeHash)
394 					{
395 						static if (isPointer!T)
396 						{
397 							if (item.hash == n.hash && *item.value == *n.value)
398 								return &item.value;
399 						}
400 						else
401 						{
402 							if (item.hash == n.hash && item.value == n.value)
403 								return &item.value;
404 						}
405 					}
406 					else
407 					{
408 						static if (isPointer!T)
409 						{
410 							if (*item.value == *n.value)
411 								return &item.value;
412 						}
413 						else
414 						{
415 							if (item.value == n.value)
416 								return &item.value;
417 						}
418 					}
419 				}
420 				return null;
421 			}
422 
423 			void insert(ItemNode n)
424 			{
425 				items[l] = n;
426 				++l;
427 			}
428 
429 			bool remove(ItemNode n)
430 			{
431 				import std.algorithm : SwapStrategy, remove;
432 
433 				foreach (size_t i, ref node; items[0 .. l])
434 				{
435 					static if (storeHash)
436 					{
437 						static if (isPointer!T)
438 							immutable bool matches = node.hash == n.hash && *node.value == *n.value;
439 						else
440 							immutable bool matches = node.hash == n.hash && node.value == n.value;
441 					}
442 					else
443 					{
444 						static if (isPointer!T)
445 							immutable bool matches = *node.value == *n.value;
446 						else
447 							immutable bool matches = node.value == n.value;
448 					}
449 					if (matches)
450 					{
451 						items[0 .. l].remove!(SwapStrategy.unstable)(i);
452 						l--;
453 						return true;
454 					}
455 				}
456 				return false;
457 			}
458 
459 			BucketNode* next;
460 			LengthType l;
461 			ItemNode[ITEMS_PER_NODE] items;
462 		}
463 
464 		bool insert(ItemNode n)
465 		{
466 			import core.memory : GC;
467 			import std.experimental.allocator : make;
468 
469 			BucketNode* hasSpace = null;
470 			for (BucketNode* current = root; current !is null; current = current.next)
471 			{
472 				if (current.get(n) !is null)
473 					return false;
474 				if (current.l < current.items.length)
475 					hasSpace = current;
476 			}
477 			if (hasSpace !is null)
478 				hasSpace.insert(n);
479 			else
480 			{
481 				BucketNode* newNode = make!BucketNode(allocator);
482 				static if (useGC)
483 					GC.addRange(newNode, BucketNode.sizeof);
484 				newNode.insert(n);
485 				newNode.next = root;
486 				root = newNode;
487 			}
488 			return true;
489 		}
490 
491 		bool remove(ItemNode n)
492 		{
493 			import core.memory : GC;
494 			import std.experimental.allocator : dispose;
495 
496 			BucketNode* current = root;
497 			BucketNode* previous;
498 			while (current !is null)
499 			{
500 				immutable removed = current.remove(n);
501 				if (removed)
502 				{
503 					if (current.l == 0)
504 					{
505 						if (previous !is null)
506 							previous.next = current.next;
507 						else
508 							root = current.next;
509 						static if (useGC)
510 							GC.removeRange(current);
511 						allocator.dispose(current);
512 					}
513 					return true;
514 				}
515 				previous = current;
516 				current = current.next;
517 			}
518 			return false;
519 		}
520 
521 		inout(T)* get(T value, Hash hash) inout
522 		{
523 			for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next)
524 			{
525 				static if (storeHash)
526 				{
527 					auto v = current.get(ItemNode(hash, value));
528 				}
529 				else
530 				{
531 					auto v = current.get(ItemNode(value));
532 				}
533 
534 				if (v !is null)
535 					return cast(typeof(return)) v;
536 			}
537 			return null;
538 		}
539 
540 		BucketNode* root;
541 		mixin AllocatorState!Allocator;
542 	}
543 
544 	static struct ItemNode
545 	{
546 		bool opEquals(ref const T v) const
547 		{
548 			static if (isPointer!T)
549 				return *v == *value;
550 			else
551 				return v == value;
552 		}
553 
554 		bool opEquals(ref const ItemNode other) const
555 		{
556 			static if (storeHash)
557 				if (other.hash != hash)
558 					return false;
559 			static if (isPointer!T)
560 				return *other.value == *value;
561 			else
562 				return other.value == value;
563 		}
564 
565 		static if (storeHash)
566 			Hash hash;
567 		ContainerStorageType!T value;
568 
569 		static if (storeHash)
570 		{
571 			this(Z)(Hash nh, Z nv)
572 			{
573 				this.hash = nh;
574 				this.value = nv;
575 			}
576 		}
577 		else
578 		{
579 			this(Z)(Z nv)
580 			{
581 				this.value = nv;
582 			}
583 		}
584 	}
585 
586 	mixin AllocatorState!Allocator;
587 	Bucket[] buckets;
588 	size_t _length;
589 }
590 
591 ///
592 version(emsi_containers_unittest) unittest
593 {
594 	import std.algorithm : canFind;
595 	import std.array : array;
596 	import std.range : walkLength;
597 	import std.uuid : randomUUID;
598 
599 	auto s = HashSet!string(16);
600 	assert(s.remove("DoesNotExist") == false);
601 	assert(!s.contains("nonsense"));
602 	assert(s.put("test"));
603 	assert(s.contains("test"));
604 	assert(!s.put("test"));
605 	assert(s.contains("test"));
606 	assert(s.length == 1);
607 	assert(!s.contains("nothere"));
608 	s.put("a");
609 	s.put("b");
610 	s.put("c");
611 	s.put("d");
612 	string[] strings = s[].array;
613 	assert(strings.canFind("a"));
614 	assert(strings.canFind("b"));
615 	assert(strings.canFind("c"));
616 	assert(strings.canFind("d"));
617 	assert(strings.canFind("test"));
618 	assert(*("a" in s) == "a");
619 	assert(*("b" in s) == "b");
620 	assert(*("c" in s) == "c");
621 	assert(*("d" in s) == "d");
622 	assert(*("test" in s) == "test");
623 	assert(strings.length == 5);
624 	assert(s.remove("test"));
625 	assert(s.length == 4);
626 	s.clear();
627 	assert(s.length == 0);
628 	assert(s.empty);
629 	s.put("abcde");
630 	assert(s.length == 1);
631 	foreach (i; 0 .. 10_000)
632 	{
633 		s.put(randomUUID().toString);
634 	}
635 	assert(s.length == 10_001);
636 
637 	// Make sure that there's no range violation slicing an empty set
638 	HashSet!int e;
639 	foreach (i; e[])
640 		assert(i > 0);
641 
642 	enum MAGICAL_NUMBER = 600_000;
643 
644 	HashSet!int f;
645 	foreach (i; 0 .. MAGICAL_NUMBER)
646 		assert(f.insert(i));
647 	assert(f.length == f[].walkLength);
648 	foreach (i; 0 .. MAGICAL_NUMBER)
649 		assert(i in f);
650 	foreach (i; 0 .. MAGICAL_NUMBER)
651 		assert(f.remove(i));
652 	foreach (i; 0 .. MAGICAL_NUMBER)
653 		assert(!f.remove(i));
654 
655 	HashSet!int g;
656 	foreach (i; 0 .. MAGICAL_NUMBER)
657 		assert(g.insert(i));
658 
659 	static struct AStruct
660 	{
661 		int a;
662 		int b;
663 	}
664 
665 	HashSet!(AStruct*, Mallocator, a => a.a) fred;
666 	fred.insert(new AStruct(10, 10));
667 	auto h = new AStruct(10, 10);
668 	assert(h in fred);
669 }
670 
671 version(emsi_containers_unittest) unittest
672 {
673 	static class Foo
674 	{
675 		string name;
676 	}
677 
678 	hash_t stringToHash(string str) @safe pure nothrow @nogc
679 	{
680 		hash_t hash = 5381;
681 		return hash;
682 	}
683 
684 	hash_t FooToHash(Foo e) pure @safe nothrow @nogc
685 	{
686 		return stringToHash(e.name);
687 	}
688 
689 	HashSet!(Foo, Mallocator, FooToHash) hs;
690 	auto f = new Foo;
691 	hs.insert(f);
692 	assert(f in hs);
693 	auto r = hs[];
694 }
695 
696 version(emsi_containers_unittest) unittest
697 {
698 	static class Foo
699 	{
700 		string name;
701 		this(string n) { this.name = n; }
702 		bool opEquals(const Foo of) const {
703 			if(of !is null) { return this.name == of.name; }
704 			else { return false; }
705 		}
706 	}
707 
708 	hash_t stringToHash(string str) @safe pure nothrow @nogc
709 	{
710 		hash_t hash = 5381;
711 		return hash;
712 	}
713 
714 	hash_t FooToHash(in Foo e) pure @safe nothrow @nogc
715 	{
716 		return stringToHash(e.name);
717 	}
718 
719 	string foo = "foo";
720 	HashSet!(const(Foo), Mallocator, FooToHash) hs;
721 	const(Foo) f = new const Foo(foo);
722 	hs.insert(f);
723 	assert(f in hs);
724 	auto r = hs[];
725 	assert(!r.empty);
726 	auto fro = r.front;
727 	assert(fro.name == foo);
728 }
729 
730 version(emsi_containers_unittest) unittest
731 {
732 	hash_t maxCollision(ulong x)
733 	{
734 		return 0;
735 	}
736 
737 	HashSet!(ulong, Mallocator, maxCollision) set;
738 	auto ipn = set.ITEMS_PER_NODE; // Need this info to trigger this bug, so I made it public
739 	assert(ipn > 1); // Won't be able to trigger this bug if there's only 1 item per node
740 
741 	foreach (i; 0 .. 2 * ipn - 1)
742 		set.insert(i);
743 
744 	assert(0 in set); // OK
745 	bool ret = set.insert(0); // 0 should be already in the set
746 	assert(!ret); // Fails
747 	assert(set.length == 2 * ipn - 1); // Fails
748 }
749 
750 version(emsi_containers_unittest) unittest
751 {
752 	import std.experimental.allocator.showcase;
753 	auto allocator = mmapRegionList(1024);
754 	auto set = HashSet!(ulong, typeof(&allocator))(0x1000, &allocator);
755 	set.insert(124);
756 }