1 /** 2 * Hash Map 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashmap; 9 10 private import core.lifetime : move; 11 private import containers.internal.hash; 12 private import containers.internal.node : shouldAddGCRange; 13 private import std.experimental.allocator.mallocator : Mallocator; 14 private import std.traits : isBasicType, Unqual; 15 16 /** 17 * Associative array / hash map. 18 * Params: 19 * K = the key type 20 * V = the value type 21 * Allocator = the allocator type to use. Defaults to `Mallocator` 22 * hashFunction = the hash function to use on the keys 23 * supportGC = true if the container should support holding references to 24 * GC-allocated memory. 25 */ 26 struct HashMap(K, V, Allocator = Mallocator, alias hashFunction = generateHash!K, 27 bool supportGC = shouldAddGCRange!K || shouldAddGCRange!V, 28 bool storeHash = true) 29 { 30 this(this) @disable; 31 32 private import std.experimental.allocator.common : stateSize; 33 34 static if (stateSize!Allocator != 0) 35 { 36 this() @disable; 37 38 /** 39 * Use the given `allocator` for allocations. 40 */ 41 this(Allocator allocator) pure nothrow @nogc @safe 42 in 43 { 44 static if (is(typeof(allocator is null))) 45 assert(allocator !is null, "Allocator must not be null"); 46 } 47 do 48 { 49 this.allocator = allocator; 50 } 51 52 /** 53 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 54 * must be a power of two. 55 */ 56 this(size_t bucketCount, Allocator allocator) 57 in 58 { 59 static if (is(typeof(allocator is null))) 60 assert(allocator !is null, "Allocator must not be null"); 61 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 62 } 63 do 64 { 65 this.allocator = allocator; 66 initialize(bucketCount); 67 } 68 69 invariant 70 { 71 static if (is(typeof(allocator is null))) 72 assert(allocator !is null, "Allocator must not be null"); 73 } 74 } 75 else 76 { 77 /** 78 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 79 * must be a power of two. 80 */ 81 this(size_t bucketCount) 82 in 83 { 84 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 85 } 86 do 87 { 88 initialize(bucketCount); 89 } 90 } 91 92 ~this() nothrow 93 { 94 scope (failure) assert(false, "HashMap destructor threw an exception"); 95 clear(); 96 } 97 98 /** 99 * Removes all items from the map 100 */ 101 void clear() 102 { 103 import std.experimental.allocator : dispose; 104 105 // always remove ranges from GC first before disposing of buckets, to 106 // prevent segfaults when the GC collects at an unfortunate time 107 static if (useGC) 108 GC.removeRange(buckets.ptr); 109 allocator.dispose(buckets); 110 111 buckets = null; 112 _length = 0; 113 } 114 115 /** 116 * Supports `aa[key]` syntax. 117 */ 118 ref opIndex(this This)(K key) 119 { 120 import std.conv : text; 121 import std.exception : enforce; 122 123 alias CET = ContainerElementType!(This, V, true); 124 size_t i; 125 auto n = find(key, i); 126 enforce(n !is null, "'" ~ text(key) ~ "' not found in HashMap"); 127 return *cast(CET*) &n.value; 128 } 129 130 /** 131 * Returns: `true` if there is an entry in this map for the given `key`, 132 * false otherwise. 133 */ 134 bool containsKey(this This)(K key) inout 135 { 136 size_t i; 137 return find(key, i) !is null; 138 } 139 140 /** 141 * Gets the value for the given key, or returns `defaultValue` if the given 142 * key is not present. 143 * 144 * Params: 145 * key = the key to look up 146 * defaultValue = the default value 147 * Returns: the value indexed by `key`, if present, or `defaultValue` otherwise. 148 */ 149 auto get(this This)(K key, lazy V defaultValue) 150 { 151 alias CET = ContainerElementType!(This, V); 152 153 size_t i; 154 auto n = find(key, i); 155 if (n is null) 156 return defaultValue; 157 return cast(CET) n.value; 158 } 159 160 /** 161 * If the given key does not exist in the HashMap, adds it with 162 * the value `defaultValue`. 163 * 164 * Params: 165 * key = the key to look up 166 * defaultValue = the default value 167 * Returns: a pointer to the value stored in the HashMap with the given key. 168 * The pointer is guaranteed to be valid only until the next HashMap 169 * modification. 170 */ 171 auto getOrAdd(this This)(K key, lazy V defaultValue) 172 { 173 alias CET = ContainerElementType!(This, V); 174 175 size_t i; 176 auto n = find(key, i); 177 if (n is null) 178 return cast(CET*) &insert(key, defaultValue).value; 179 else 180 return cast(CET*) &n.value; 181 } 182 183 /** 184 * Supports $(B aa[key] = value;) syntax. 185 */ 186 void opIndexAssign(V value, const K key) 187 { 188 insert(key, move(mutable(value))); 189 } 190 191 /** 192 * Supports $(B key in aa) syntax. 193 * 194 * Returns: pointer to the value corresponding to the given key, 195 * or null if the key is not present in the HashMap. 196 */ 197 inout(V)* opBinaryRight(string op)(const K key) inout nothrow @trusted if (op == "in") 198 { 199 size_t i; 200 auto n = find(key, i); 201 if (n is null) 202 return null; 203 return &(cast(inout) n).value; 204 } 205 206 /** 207 * Removes the value associated with the given key 208 * Returns: true if a value was actually removed. 209 */ 210 bool remove(K key) 211 { 212 size_t i; 213 auto n = find(key, i); 214 if (n is null) 215 return false; 216 static if (storeHash) 217 auto node = Node(n.hash, n.key); 218 else 219 auto node = Node(n.key); 220 immutable bool removed = buckets[i].remove(node); 221 if (removed) 222 _length--; 223 return removed; 224 } 225 226 /** 227 * Returns: the number of key/value pairs in this container. 228 */ 229 size_t length() const nothrow pure @property @safe @nogc 230 { 231 return _length; 232 } 233 234 /** 235 * Returns: `true` if there are no items in this container. 236 */ 237 bool empty() const nothrow pure @property @safe @nogc 238 { 239 return _length == 0; 240 } 241 242 /** 243 * Returns: a range of the keys in this map. 244 */ 245 auto byKey(this This)() inout @trusted 246 { 247 return MapRange!(This, IterType.key)(cast(Unqual!(This)*) &this); 248 } 249 250 /** 251 * Returns: a GC-allocated array filled with the keys contained in this map. 252 */ 253 K[] keys() const @property 254 out(result) 255 { 256 assert (result.length == _length, "Length mismatch"); 257 } 258 do 259 { 260 import std.array : appender; 261 auto app = appender!(K[])(); 262 foreach (ref const bucket; buckets) 263 { 264 foreach (ref item; bucket) 265 app.put(cast(K) item.key); 266 } 267 return app.data; 268 } 269 270 271 /** 272 * Returns: a range of the values in this map. 273 */ 274 auto byValue(this This)() inout @trusted 275 { 276 return MapRange!(This, IterType.value)(cast(Unqual!(This)*) &this); 277 } 278 279 /// ditto 280 alias opSlice = byValue; 281 282 /** 283 * Returns: a GC-allocated array containing the values contained in this map. 284 */ 285 auto values(this This)() const @property 286 out(result) 287 { 288 assert (result.length == _length, "Length mismatch"); 289 } 290 do 291 { 292 import std.array : appender; 293 auto app = appender!(ContainerElementType!(This, V)[])(); 294 foreach (ref const bucket; buckets) 295 { 296 foreach (item; bucket) 297 app.put(cast(ContainerElementType!(This, V)) item.value); 298 } 299 return app.data; 300 } 301 302 /** 303 * Returns: a range of the kev/value pairs in this map. The element type of 304 * this range is a struct with `key` and `value` fields. 305 */ 306 auto byKeyValue(this This)() inout @trusted 307 { 308 return MapRange!(This, IterType.both)(cast(Unqual!(This)*) &this); 309 } 310 311 /** 312 * Support for $(D foreach(key, value; aa) { ... }) syntax; 313 */ 314 int opApply(int delegate(const ref K, ref V) del) 315 { 316 int result = 0; 317 foreach (ref bucket; buckets) 318 foreach (ref node; bucket[]) 319 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 320 return result; 321 return result; 322 } 323 324 /// ditto 325 int opApply(int delegate(const ref K, const ref V) del) const 326 { 327 int result = 0; 328 foreach (const ref bucket; buckets) 329 foreach (const ref node; bucket[]) 330 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 331 return result; 332 return result; 333 } 334 335 /// ditto 336 int opApply(int delegate(ref V) del) 337 { 338 int result = 0; 339 foreach (ref bucket; buckets) 340 foreach (ref node; bucket[]) 341 if ((result = del(*cast(V*)&node.value)) != 0) 342 return result; 343 return result; 344 } 345 346 /// ditto 347 int opApply(int delegate(const ref V) del) const 348 { 349 int result = 0; 350 foreach (const ref bucket; buckets) 351 foreach (const ref node; bucket[]) 352 if ((result = del(*cast(V*)&node.value)) != 0) 353 return result; 354 return result; 355 } 356 357 mixin AllocatorState!Allocator; 358 359 private: 360 361 import std.experimental.allocator : make, makeArray; 362 import containers.unrolledlist : UnrolledList; 363 import containers.internal.storage_type : ContainerStorageType; 364 import containers.internal.element_type : ContainerElementType; 365 import containers.internal.mixins : AllocatorState; 366 import core.memory : GC; 367 368 enum bool useGC = supportGC && (shouldAddGCRange!K || shouldAddGCRange!V); 369 alias Hash = typeof({ K k = void; return hashFunction(k); }()); 370 371 static ref ContainerStorageType!T mutable(T)(ref T value) { return *cast(ContainerStorageType!T*)&value; } 372 373 enum IterType: ubyte 374 { 375 key, value, both 376 } 377 378 static struct MapRange(MapType, IterType Type) 379 { 380 static if (Type == IterType.both) 381 { 382 struct FrontType 383 { 384 ContainerElementType!(MapType, K) key; 385 ContainerElementType!(MapType, V) value; 386 } 387 } 388 else static if (Type == IterType.value) 389 alias FrontType = ContainerElementType!(MapType, V); 390 else static if (Type == IterType.key) 391 alias FrontType = ContainerElementType!(MapType, K); 392 else 393 static assert(false); 394 395 FrontType front() 396 { 397 static if (Type == IterType.both) 398 return FrontType(cast(ContainerElementType!(MapType, K)) bucketRange.front.key, 399 cast(ContainerElementType!(MapType, V)) bucketRange.front.value); 400 else static if (Type == IterType.value) 401 return cast(ContainerElementType!(MapType, V)) bucketRange.front.value; 402 else static if (Type == IterType.key) 403 return cast(ContainerElementType!(MapType, K)) bucketRange.front.key; 404 else 405 static assert(false); 406 } 407 408 bool empty() const pure nothrow @nogc @property 409 { 410 return _empty; 411 } 412 413 void popFront() pure nothrow @nogc 414 { 415 bucketRange.popFront(); 416 if (bucketRange.empty) 417 { 418 while (bucketRange.empty) 419 { 420 bucketIndex++; 421 if (bucketIndex >= hm.buckets.length) 422 { 423 _empty = true; 424 break; 425 } 426 else 427 bucketRange = hm.buckets[bucketIndex][]; 428 } 429 } 430 } 431 432 private: 433 434 this(Unqual!(MapType)* hm) 435 { 436 this.hm = hm; 437 this.bucketIndex = 0; 438 bucketRange = typeof(bucketRange).init; 439 this._empty = false; 440 441 while (true) 442 { 443 if (bucketIndex >= hm.buckets.length) 444 { 445 _empty = true; 446 break; 447 } 448 bucketRange = hm.buckets[bucketIndex][]; 449 if (bucketRange.empty) 450 bucketIndex++; 451 else 452 break; 453 } 454 } 455 456 Unqual!(MapType)* hm; 457 size_t bucketIndex; 458 typeof(hm.buckets[0].opSlice()) bucketRange; 459 bool _empty; 460 } 461 462 void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT) 463 { 464 import std.conv : emplace; 465 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 466 467 buckets = makeArray!Bucket(allocator, bucketCount); 468 static if (useGC) 469 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 470 foreach (ref bucket; buckets) 471 { 472 static if (stateSize!Allocator == 0) 473 emplace(&bucket); 474 else 475 emplace(&bucket, allocator); 476 } 477 } 478 479 Node* insert(const K key, V value) 480 { 481 return insert(key, move(mutable(value)), hashFunction(key)); 482 } 483 484 Node* insert(const K key, V value, const Hash hash, const bool modifyLength = true) 485 { 486 if (buckets.length == 0) 487 initialize(); 488 immutable size_t index = hashToIndex(hash, buckets.length); 489 foreach (ref item; buckets[index]) 490 { 491 if (item.hash == hash && item.key == key) 492 { 493 item.value = move(mutable(value)); 494 return &item; 495 } 496 } 497 static if (storeHash) 498 Node node = Node(hash, cast(ContainerStorageType!K) key, move(mutable(value))); 499 else 500 Node node = Node(cast(ContainerStorageType!K) key, move(mutable(value))); 501 Node* n = buckets[index].insertAnywhere(move(node)); 502 if (modifyLength) 503 _length++; 504 if (shouldRehash()) 505 { 506 rehash(); 507 immutable newIndex = hashToIndex(hash, buckets.length); 508 foreach (ref item; buckets[newIndex]) 509 { 510 if (item.hash == hash && item.key == key) 511 return &item; 512 } 513 assert(false, "Inserted item not found after rehash"); 514 } 515 else 516 return n; 517 } 518 519 /** 520 * Returns: true if the load factor has been exceeded 521 */ 522 bool shouldRehash() const pure nothrow @safe @nogc 523 { 524 // We let this be greater than one because each bucket is an unrolled 525 // list that has more than one element per linked list node. 526 return (float(_length) / float(buckets.length)) > 1.33f; 527 } 528 529 /** 530 * Rehash the map. 531 */ 532 void rehash() @trusted 533 { 534 import std.conv : emplace; 535 immutable size_t newLength = buckets.length << 1; 536 immutable size_t newSize = newLength * Bucket.sizeof; 537 Bucket[] oldBuckets = buckets; 538 buckets = cast(Bucket[]) allocator.allocate(newSize); 539 if (newLength) 540 assert (buckets, "Bucket reallocation failed"); 541 static if (useGC) 542 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 543 assert (buckets.length == newLength, "Bucket reallocation length mismatch"); 544 foreach (ref bucket; buckets) 545 { 546 static if (stateSize!Allocator == 0) 547 emplace(&bucket); 548 else 549 emplace(&bucket, allocator); 550 } 551 552 foreach (ref bucket; oldBuckets) 553 { 554 foreach (ref node; bucket) 555 insert(cast(K) node.key, move(node.value), node.hash, false); 556 typeid(typeof(bucket)).destroy(&bucket); 557 } 558 static if (useGC) 559 GC.removeRange(oldBuckets.ptr); 560 allocator.deallocate(cast(void[]) oldBuckets); 561 } 562 563 inout(Node)* find(const K key, ref size_t index) inout 564 { 565 return find(key, index, hashFunction(key)); 566 } 567 568 inout(Node)* find(const K key, ref size_t index, const Hash hash) inout 569 { 570 import std.array : empty; 571 572 if (buckets.empty) 573 return null; 574 index = hashToIndex(hash, buckets.length); 575 foreach (ref r; buckets[index]) 576 { 577 if (r.hash == hash && r.key == key) 578 return cast(inout(Node)*) &r; 579 } 580 return null; 581 } 582 583 struct Node 584 { 585 bool opEquals(ref const K key) const 586 { 587 return key == this.key; 588 } 589 590 bool opEquals(ref const Node n) const 591 { 592 return this.hash == n.hash && this.key == n.key; 593 } 594 595 static if (storeHash) 596 Hash hash; 597 else 598 @property Hash hash() const { return hashFunction(key); } 599 600 ContainerStorageType!K key; 601 ContainerStorageType!V value; 602 } 603 604 alias Bucket = UnrolledList!(Node, Allocator, useGC); 605 Bucket[] buckets; 606 size_t _length; 607 } 608 609 /// 610 unittest 611 { 612 import std.uuid : randomUUID; 613 import std.range.primitives : walkLength; 614 615 auto hm = HashMap!(string, int)(16); 616 assert (hm.length == 0); 617 assert (!hm.remove("abc")); 618 hm["answer"] = 42; 619 assert (hm.length == 1); 620 assert ("answer" in hm); 621 assert (hm.containsKey("answer")); 622 hm.remove("answer"); 623 assert (hm.length == 0); 624 assert ("answer" !in hm); 625 assert (hm.get("answer", 1000) == 1000); 626 assert (!hm.containsKey("answer")); 627 hm["one"] = 1; 628 hm["one"] = 1; 629 assert (hm.length == 1); 630 assert (hm["one"] == 1); 631 hm["one"] = 2; 632 assert(hm["one"] == 2); 633 foreach (i; 0 .. 1000) 634 { 635 hm[randomUUID().toString] = i; 636 } 637 assert (hm.length == 1001); 638 assert (hm.keys().length == hm.length); 639 assert (hm.values().length == hm.length); 640 () @nogc { 641 assert (hm.byKey().walkLength == hm.length); 642 assert (hm.byValue().walkLength == hm.length); 643 assert (hm[].walkLength == hm.length); 644 assert (hm.byKeyValue().walkLength == hm.length); 645 }(); 646 foreach (v; hm) {} 647 648 auto hm2 = HashMap!(char, char)(4); 649 hm2['a'] = 'a'; 650 651 HashMap!(int, int) hm3; 652 assert (hm3.get(100, 20) == 20); 653 hm3[100] = 1; 654 assert (hm3.get(100, 20) == 1); 655 auto pValue = 100 in hm3; 656 assert(*pValue == 1); 657 } 658 659 version(emsi_containers_unittest) unittest 660 { 661 static class Foo 662 { 663 string name; 664 } 665 666 void someFunc(const scope ref HashMap!(string,Foo) map) @safe 667 { 668 foreach (kv; map.byKeyValue()) 669 { 670 assert (kv.key == "foo"); 671 assert (kv.value.name == "Foo"); 672 } 673 } 674 675 auto hm = HashMap!(string, Foo)(16); 676 auto f = new Foo; 677 f.name = "Foo"; 678 hm.insert("foo", f); 679 assert("foo" in hm); 680 } 681 682 // Issue #54 683 version(emsi_containers_unittest) unittest 684 { 685 HashMap!(string, int) map; 686 map.insert("foo", 0); 687 map.insert("bar", 0); 688 689 foreach (key; map.keys()) 690 map[key] = 1; 691 foreach (key; map.byKey()) 692 map[key] = 1; 693 694 foreach (value; map.byValue()) 695 assert(value == 1); 696 foreach (value; map.values()) 697 assert(value == 1); 698 } 699 700 version(emsi_containers_unittest) unittest 701 { 702 HashMap!(int, int, Mallocator, (int i) => i) map; 703 auto p = map.getOrAdd(1, 1); 704 assert(*p == 1); 705 *p = 2; 706 assert(map[1] == 2); 707 } 708 709 debug (EMSI_CONTAINERS) version(emsi_containers_unittest) unittest 710 { 711 import std.uuid : randomUUID; 712 import std.algorithm.iteration : walkLength; 713 import std.stdio; 714 715 auto hm = HashMap!(string, int)(16); 716 foreach (i; 0 .. 1_000_000) 717 { 718 auto str = randomUUID().toString; 719 //writeln("Inserting ", str); 720 hm[str] = i; 721 //if (i > 0 && i % 100 == 0) 722 //writeln(i); 723 } 724 writeln(hm.buckets.length); 725 726 import std.algorithm.sorting:sort; 727 ulong[ulong] counts; 728 foreach (i, ref bucket; hm.buckets[]) 729 counts[bucket.length]++; 730 foreach (k; counts.keys.sort()) 731 writeln(k, "=>", counts[k]); 732 } 733 734 // #74 735 version(emsi_containers_unittest) unittest 736 { 737 HashMap!(string, size_t) aa; 738 aa["b"] = 0; 739 ++aa["b"]; 740 assert(aa["b"] == 1); 741 } 742 743 // storeHash == false 744 version(emsi_containers_unittest) unittest 745 { 746 static struct S { size_t v; } 747 HashMap!(S, S, Mallocator, (S s) { return s.v; }, false, false) aa; 748 static assert(aa.Node.sizeof == 2 * S.sizeof); 749 } 750 751 version(emsi_containers_unittest) unittest 752 { 753 auto hm = HashMap!(string, int)(16); 754 755 foreach (v; hm) {} 756 foreach (ref v; hm) {} 757 foreach (int v; hm) {} 758 foreach (ref int v; hm) {} 759 foreach (const ref int v; hm) {} 760 761 foreach (k, v; hm) {} 762 foreach (k, ref v; hm) {} 763 foreach (k, int v; hm) {} 764 foreach (k, ref int v; hm) {} 765 foreach (k, const ref int v; hm) {} 766 767 foreach (ref k, v; hm) {} 768 foreach (ref k, ref v; hm) {} 769 foreach (ref k, int v; hm) {} 770 foreach (ref k, ref int v; hm) {} 771 foreach (ref k, const ref int v; hm) {} 772 773 foreach (const string k, v; hm) {} 774 foreach (const string k, ref v; hm) {} 775 foreach (const string k, int v; hm) {} 776 foreach (const string k, ref int v; hm) {} 777 foreach (const string k, const ref int v; hm) {} 778 779 foreach (const ref string k, v; hm) {} 780 foreach (const ref string k, ref v; hm) {} 781 foreach (const ref string k, int v; hm) {} 782 foreach (const ref string k, ref int v; hm) {} 783 foreach (const ref string k, const ref int v; hm) {} 784 785 hm["a"] = 1; 786 foreach (k, ref v; hm) { v++; } 787 assert(hm["a"] == 2); 788 } 789 790 version(emsi_containers_unittest) unittest 791 { 792 static struct S { @disable this(this); } 793 alias HM = HashMap!(int, S); 794 } 795 796 version(emsi_containers_unittest) unittest 797 { 798 struct S { int* a; } 799 alias HM = HashMap!(S, int); 800 }