1 /** 2 * Hash Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 private import containers.internal.hash : generateHash, hashToIndex; 11 private import containers.internal.node : shouldAddGCRange; 12 private import std.experimental.allocator.mallocator : Mallocator; 13 private import std.traits : isBasicType; 14 15 /** 16 * Hash Set. 17 * Params: 18 * T = the element type 19 * Allocator = the allocator to use. Defaults to `Mallocator`. 20 * hashFunction = the hash function to use on the elements 21 * supportGC = true if the container should support holding references to 22 * GC-allocated memory. 23 */ 24 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T, 25 bool supportGC = shouldAddGCRange!T, 26 bool storeHash = !isBasicType!T) 27 { 28 this(this) @disable; 29 30 private import std.experimental.allocator.common : stateSize; 31 32 static if (stateSize!Allocator != 0) 33 { 34 this() @disable; 35 36 /** 37 * Use the given `allocator` for allocations. 38 */ 39 this(Allocator allocator) 40 in 41 { 42 static if (is(typeof(allocator is null))) 43 assert(allocator !is null, "Allocator must not be null"); 44 } 45 do 46 { 47 this.allocator = allocator; 48 } 49 50 /** 51 * Constructs a HashSet with an initial bucket count of bucketCount. 52 * bucketCount must be a power of two. 53 */ 54 this(size_t bucketCount, Allocator allocator) 55 in 56 { 57 static if (is(typeof(allocator is null))) 58 assert(allocator !is null, "Allocator must not be null"); 59 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 60 } 61 do 62 { 63 this.allocator = allocator; 64 initialize(bucketCount); 65 } 66 } 67 else 68 { 69 /** 70 * Constructs a HashSet with an initial bucket count of bucketCount. 71 * bucketCount must be a power of two. 72 */ 73 this(size_t bucketCount) 74 in 75 { 76 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 77 } 78 do 79 { 80 initialize(bucketCount); 81 } 82 83 } 84 85 ~this() 86 { 87 import std.experimental.allocator : dispose; 88 import core.memory : GC; 89 static if (useGC) 90 GC.removeRange(buckets.ptr); 91 allocator.dispose(buckets); 92 } 93 94 /** 95 * Removes all items from the set 96 */ 97 void clear() 98 { 99 foreach (ref bucket; buckets) 100 { 101 destroy(bucket); 102 bucket = Bucket.init; 103 } 104 _length = 0; 105 } 106 107 /** 108 * Removes the given item from the set. 109 * Returns: false if the value was not present 110 */ 111 bool remove(T value) 112 { 113 if (buckets.length == 0) 114 return false; 115 immutable Hash hash = hashFunction(value); 116 immutable size_t index = hashToIndex(hash, buckets.length); 117 static if (storeHash) 118 immutable bool removed = buckets[index].remove(ItemNode(hash, value)); 119 else 120 immutable bool removed = buckets[index].remove(ItemNode(value)); 121 if (removed) 122 --_length; 123 return removed; 124 } 125 126 /** 127 * Returns: true if value is contained in the set. 128 */ 129 bool contains(T value) inout 130 { 131 return (value in this) !is null; 132 } 133 134 /** 135 * Supports $(B a in b) syntax 136 */ 137 inout(T)* opBinaryRight(string op)(T value) inout if (op == "in") 138 { 139 if (buckets.length == 0 || _length == 0) 140 return null; 141 immutable Hash hash = hashFunction(value); 142 immutable index = hashToIndex(hash, buckets.length); 143 return buckets[index].get(value, hash); 144 } 145 146 /** 147 * Inserts the given item into the set. 148 * Params: value = the value to insert 149 * Returns: true if the value was actually inserted, or false if it was 150 * already present. 151 */ 152 bool insert(T value) 153 { 154 if (buckets.length == 0) 155 initialize(4); 156 Hash hash = hashFunction(value); 157 immutable size_t index = hashToIndex(hash, buckets.length); 158 static if (storeHash) 159 auto r = buckets[index].insert(ItemNode(hash, value)); 160 else 161 auto r = buckets[index].insert(ItemNode(value)); 162 if (r) 163 ++_length; 164 if (shouldRehash) 165 rehash(); 166 return r; 167 } 168 169 /// ditto 170 bool opOpAssign(string op)(T item) if (op == "~") 171 { 172 return insert(item); 173 } 174 175 /// ditto 176 alias put = insert; 177 178 /// ditto 179 alias insertAnywhere = insert; 180 181 /** 182 * Returns: true if the set has no items 183 */ 184 bool empty() const nothrow pure @nogc @safe @property 185 { 186 return _length == 0; 187 } 188 189 /** 190 * Returns: the number of items in the set 191 */ 192 size_t length() const nothrow pure @nogc @safe @property 193 { 194 return _length; 195 } 196 197 /** 198 * Forward range interface 199 */ 200 auto opSlice(this This)() nothrow @nogc @trusted 201 { 202 return Range!(This)(&this); 203 } 204 205 private: 206 207 import containers.internal.element_type : ContainerElementType; 208 import containers.internal.mixins : AllocatorState; 209 import containers.internal.node : shouldAddGCRange, FatNodeInfo; 210 import containers.internal.storage_type : ContainerStorageType; 211 import std.traits : isPointer; 212 213 alias LengthType = ubyte; 214 alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof); 215 enum ITEMS_PER_NODE = N[0]; 216 static assert(LengthType.max > ITEMS_PER_NODE); 217 enum bool useGC = supportGC && shouldAddGCRange!T; 218 alias Hash = typeof({ T v = void; return hashFunction(v); }()); 219 220 void initialize(size_t bucketCount) 221 { 222 import core.memory : GC; 223 import std.experimental.allocator : makeArray; 224 225 makeBuckets(bucketCount); 226 static if (useGC) 227 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 228 } 229 230 static struct Range(ThisT) 231 { 232 this(ThisT* t) 233 { 234 foreach (i, ref bucket; t.buckets) 235 { 236 bucketIndex = i; 237 if (bucket.root !is null) 238 { 239 currentNode = cast(Bucket.BucketNode*) bucket.root; 240 break; 241 } 242 } 243 this.t = t; 244 } 245 246 bool empty() const nothrow @safe @nogc @property 247 { 248 return currentNode is null; 249 } 250 251 ET front() nothrow @safe @nogc @property 252 { 253 return cast(ET) currentNode.items[nodeIndex].value; 254 } 255 256 void popFront() nothrow @trusted @nogc 257 { 258 if (nodeIndex + 1 < currentNode.l) 259 { 260 ++nodeIndex; 261 return; 262 } 263 else 264 { 265 nodeIndex = 0; 266 if (currentNode.next is null) 267 { 268 ++bucketIndex; 269 while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null) 270 ++bucketIndex; 271 if (bucketIndex < t.buckets.length) 272 currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root; 273 else 274 currentNode = null; 275 } 276 else 277 { 278 currentNode = currentNode.next; 279 assert(currentNode.l > 0, "Empty node"); 280 } 281 } 282 } 283 284 private: 285 alias ET = ContainerElementType!(ThisT, T); 286 ThisT* t; 287 Bucket.BucketNode* currentNode; 288 size_t bucketIndex; 289 size_t nodeIndex; 290 } 291 292 void makeBuckets(size_t bucketCount) 293 { 294 import std.experimental.allocator : makeArray; 295 296 static if (stateSize!Allocator == 0) 297 buckets = allocator.makeArray!Bucket(bucketCount); 298 else 299 { 300 import std.conv:emplace; 301 302 buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount); 303 foreach (ref bucket; buckets) 304 emplace!Bucket(&bucket, allocator); 305 } 306 } 307 308 bool shouldRehash() const pure nothrow @safe @nogc 309 { 310 immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE; 311 return (numberOfNodes / cast(float) buckets.length) > 0.75f; 312 } 313 314 void rehash() @trusted 315 { 316 import std.experimental.allocator : makeArray, dispose; 317 import core.memory : GC; 318 319 immutable size_t newLength = buckets.length << 1; 320 Bucket[] oldBuckets = buckets; 321 makeBuckets(newLength); 322 assert (buckets, "Bucket reallocation failed"); 323 assert (buckets.length == newLength, "Bucket reallocation size mismatch"); 324 static if (useGC) 325 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 326 foreach (ref const bucket; oldBuckets) 327 { 328 for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next) 329 { 330 for (size_t i = 0; i < node.l; ++i) 331 { 332 static if (storeHash) 333 { 334 immutable Hash hash = node.items[i].hash; 335 size_t index = hashToIndex(hash, buckets.length); 336 buckets[index].insert(ItemNode(hash, node.items[i].value)); 337 } 338 else 339 { 340 immutable Hash hash = hashFunction(node.items[i].value); 341 size_t index = hashToIndex(hash, buckets.length); 342 buckets[index].insert(ItemNode(node.items[i].value)); 343 } 344 } 345 } 346 } 347 static if (useGC) 348 GC.removeRange(oldBuckets.ptr); 349 allocator.dispose(oldBuckets); 350 } 351 352 static struct Bucket 353 { 354 this(this) @disable; 355 356 static if (stateSize!Allocator != 0) 357 { 358 this(Allocator allocator) 359 { 360 this.allocator = allocator; 361 } 362 this() @disable; 363 } 364 365 ~this() 366 { 367 import core.memory : GC; 368 import std.experimental.allocator : dispose; 369 370 BucketNode* current = root; 371 BucketNode* previous; 372 while (true) 373 { 374 if (previous !is null) 375 { 376 static if (useGC) 377 GC.removeRange(previous); 378 allocator.dispose(previous); 379 } 380 previous = current; 381 if (current is null) 382 break; 383 current = current.next; 384 } 385 } 386 387 static struct BucketNode 388 { 389 ContainerStorageType!(T)* get(ItemNode n) 390 { 391 foreach (ref item; items[0 .. l]) 392 { 393 static if (storeHash) 394 { 395 static if (isPointer!T) 396 { 397 if (item.hash == n.hash && *item.value == *n.value) 398 return &item.value; 399 } 400 else 401 { 402 if (item.hash == n.hash && item.value == n.value) 403 return &item.value; 404 } 405 } 406 else 407 { 408 static if (isPointer!T) 409 { 410 if (*item.value == *n.value) 411 return &item.value; 412 } 413 else 414 { 415 if (item.value == n.value) 416 return &item.value; 417 } 418 } 419 } 420 return null; 421 } 422 423 void insert(ItemNode n) 424 { 425 items[l] = n; 426 ++l; 427 } 428 429 bool remove(ItemNode n) 430 { 431 import std.algorithm : SwapStrategy, remove; 432 433 foreach (size_t i, ref node; items[0 .. l]) 434 { 435 static if (storeHash) 436 { 437 static if (isPointer!T) 438 immutable bool matches = node.hash == n.hash && *node.value == *n.value; 439 else 440 immutable bool matches = node.hash == n.hash && node.value == n.value; 441 } 442 else 443 { 444 static if (isPointer!T) 445 immutable bool matches = *node.value == *n.value; 446 else 447 immutable bool matches = node.value == n.value; 448 } 449 if (matches) 450 { 451 items[0 .. l].remove!(SwapStrategy.unstable)(i); 452 l--; 453 return true; 454 } 455 } 456 return false; 457 } 458 459 BucketNode* next; 460 LengthType l; 461 ItemNode[ITEMS_PER_NODE] items; 462 } 463 464 bool insert(ItemNode n) 465 { 466 import core.memory : GC; 467 import std.experimental.allocator : make; 468 469 BucketNode* hasSpace = null; 470 for (BucketNode* current = root; current !is null; current = current.next) 471 { 472 if (current.get(n) !is null) 473 return false; 474 if (current.l < current.items.length) 475 hasSpace = current; 476 } 477 if (hasSpace !is null) 478 hasSpace.insert(n); 479 else 480 { 481 BucketNode* newNode = make!BucketNode(allocator); 482 static if (useGC) 483 GC.addRange(newNode, BucketNode.sizeof); 484 newNode.insert(n); 485 newNode.next = root; 486 root = newNode; 487 } 488 return true; 489 } 490 491 bool remove(ItemNode n) 492 { 493 import core.memory : GC; 494 import std.experimental.allocator : dispose; 495 496 BucketNode* current = root; 497 BucketNode* previous; 498 while (current !is null) 499 { 500 immutable removed = current.remove(n); 501 if (removed) 502 { 503 if (current.l == 0) 504 { 505 if (previous !is null) 506 previous.next = current.next; 507 else 508 root = current.next; 509 static if (useGC) 510 GC.removeRange(current); 511 allocator.dispose(current); 512 } 513 return true; 514 } 515 previous = current; 516 current = current.next; 517 } 518 return false; 519 } 520 521 inout(T)* get(T value, Hash hash) inout 522 { 523 for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next) 524 { 525 static if (storeHash) 526 { 527 auto v = current.get(ItemNode(hash, value)); 528 } 529 else 530 { 531 auto v = current.get(ItemNode(value)); 532 } 533 534 if (v !is null) 535 return cast(typeof(return)) v; 536 } 537 return null; 538 } 539 540 BucketNode* root; 541 mixin AllocatorState!Allocator; 542 } 543 544 static struct ItemNode 545 { 546 bool opEquals(ref const T v) const 547 { 548 static if (isPointer!T) 549 return *v == *value; 550 else 551 return v == value; 552 } 553 554 bool opEquals(ref const ItemNode other) const 555 { 556 static if (storeHash) 557 if (other.hash != hash) 558 return false; 559 static if (isPointer!T) 560 return *other.value == *value; 561 else 562 return other.value == value; 563 } 564 565 static if (storeHash) 566 Hash hash; 567 ContainerStorageType!T value; 568 569 static if (storeHash) 570 { 571 this(Z)(Hash nh, Z nv) 572 { 573 this.hash = nh; 574 this.value = nv; 575 } 576 } 577 else 578 { 579 this(Z)(Z nv) 580 { 581 this.value = nv; 582 } 583 } 584 } 585 586 mixin AllocatorState!Allocator; 587 Bucket[] buckets; 588 size_t _length; 589 } 590 591 /// 592 version(emsi_containers_unittest) unittest 593 { 594 import std.algorithm : canFind; 595 import std.array : array; 596 import std.range : walkLength; 597 import std.uuid : randomUUID; 598 599 auto s = HashSet!string(16); 600 assert(s.remove("DoesNotExist") == false); 601 assert(!s.contains("nonsense")); 602 assert(s.put("test")); 603 assert(s.contains("test")); 604 assert(!s.put("test")); 605 assert(s.contains("test")); 606 assert(s.length == 1); 607 assert(!s.contains("nothere")); 608 s.put("a"); 609 s.put("b"); 610 s.put("c"); 611 s.put("d"); 612 string[] strings = s[].array; 613 assert(strings.canFind("a")); 614 assert(strings.canFind("b")); 615 assert(strings.canFind("c")); 616 assert(strings.canFind("d")); 617 assert(strings.canFind("test")); 618 assert(*("a" in s) == "a"); 619 assert(*("b" in s) == "b"); 620 assert(*("c" in s) == "c"); 621 assert(*("d" in s) == "d"); 622 assert(*("test" in s) == "test"); 623 assert(strings.length == 5); 624 assert(s.remove("test")); 625 assert(s.length == 4); 626 s.clear(); 627 assert(s.length == 0); 628 assert(s.empty); 629 s.put("abcde"); 630 assert(s.length == 1); 631 foreach (i; 0 .. 10_000) 632 { 633 s.put(randomUUID().toString); 634 } 635 assert(s.length == 10_001); 636 637 // Make sure that there's no range violation slicing an empty set 638 HashSet!int e; 639 foreach (i; e[]) 640 assert(i > 0); 641 642 enum MAGICAL_NUMBER = 600_000; 643 644 HashSet!int f; 645 foreach (i; 0 .. MAGICAL_NUMBER) 646 assert(f.insert(i)); 647 assert(f.length == f[].walkLength); 648 foreach (i; 0 .. MAGICAL_NUMBER) 649 assert(i in f); 650 foreach (i; 0 .. MAGICAL_NUMBER) 651 assert(f.remove(i)); 652 foreach (i; 0 .. MAGICAL_NUMBER) 653 assert(!f.remove(i)); 654 655 HashSet!int g; 656 foreach (i; 0 .. MAGICAL_NUMBER) 657 assert(g.insert(i)); 658 659 static struct AStruct 660 { 661 int a; 662 int b; 663 } 664 665 HashSet!(AStruct*, Mallocator, a => a.a) fred; 666 fred.insert(new AStruct(10, 10)); 667 auto h = new AStruct(10, 10); 668 assert(h in fred); 669 } 670 671 version(emsi_containers_unittest) unittest 672 { 673 static class Foo 674 { 675 string name; 676 } 677 678 hash_t stringToHash(string str) @safe pure nothrow @nogc 679 { 680 hash_t hash = 5381; 681 return hash; 682 } 683 684 hash_t FooToHash(Foo e) pure @safe nothrow @nogc 685 { 686 return stringToHash(e.name); 687 } 688 689 HashSet!(Foo, Mallocator, FooToHash) hs; 690 auto f = new Foo; 691 hs.insert(f); 692 assert(f in hs); 693 auto r = hs[]; 694 } 695 696 version(emsi_containers_unittest) unittest 697 { 698 static class Foo 699 { 700 string name; 701 this(string n) { this.name = n; } 702 bool opEquals(const Foo of) const { 703 if(of !is null) { return this.name == of.name; } 704 else { return false; } 705 } 706 } 707 708 hash_t stringToHash(string str) @safe pure nothrow @nogc 709 { 710 hash_t hash = 5381; 711 return hash; 712 } 713 714 hash_t FooToHash(in Foo e) pure @safe nothrow @nogc 715 { 716 return stringToHash(e.name); 717 } 718 719 string foo = "foo"; 720 HashSet!(const(Foo), Mallocator, FooToHash) hs; 721 const(Foo) f = new const Foo(foo); 722 hs.insert(f); 723 assert(f in hs); 724 auto r = hs[]; 725 assert(!r.empty); 726 auto fro = r.front; 727 assert(fro.name == foo); 728 } 729 730 version(emsi_containers_unittest) unittest 731 { 732 hash_t maxCollision(ulong x) 733 { 734 return 0; 735 } 736 737 HashSet!(ulong, Mallocator, maxCollision) set; 738 auto ipn = set.ITEMS_PER_NODE; // Need this info to trigger this bug, so I made it public 739 assert(ipn > 1); // Won't be able to trigger this bug if there's only 1 item per node 740 741 foreach (i; 0 .. 2 * ipn - 1) 742 set.insert(i); 743 744 assert(0 in set); // OK 745 bool ret = set.insert(0); // 0 should be already in the set 746 assert(!ret); // Fails 747 assert(set.length == 2 * ipn - 1); // Fails 748 } 749 750 version(emsi_containers_unittest) unittest 751 { 752 import std.experimental.allocator.showcase; 753 auto allocator = mmapRegionList(1024); 754 auto set = HashSet!(ulong, typeof(&allocator))(0x1000, &allocator); 755 set.insert(124); 756 }