1 /** 2 Stuff having to do with memory management. Mostly a copy of RegionAllocator 3 for now until it gets into Phobos, as well as some RegionAllocator-specific 4 data structures. 5 6 Author: David Simcha*/ 7 /* 8 * License: 9 * Boost Software License - Version 1.0 - August 17th, 2003 10 * 11 * Permission is hereby granted, free of charge, to any person or organization 12 * obtaining a copy of the software and accompanying documentation covered by 13 * this license (the "Software") to use, reproduce, display, distribute, 14 * execute, and transmit the Software, and to prepare derivative works of the 15 * Software, and to permit third-parties to whom the Software is furnished to 16 * do so, all subject to the following: 17 * 18 * The copyright notices in the Software and this entire statement, including 19 * the above license grant, this restriction and the following disclaimer, 20 * must be included in all copies of the Software, in whole or in part, and 21 * all derivative works of the Software, unless such copies or derivative 22 * works are solely in the form of machine-executable object code generated by 23 * a source language processor. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 28 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 29 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 30 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 31 * DEALINGS IN THE SOFTWARE. 32 */ 33 34 module dstats.alloc; 35 36 import std.traits, core.memory, std.array, std.range, core.exception, 37 std.functional, std.math, std.algorithm : max; 38 39 static import core.stdc.stdlib; 40 import core.stdc.string : memcpy; 41 42 import dstats.base; 43 44 version(unittest) { 45 import std.stdio, std.conv, std.random, dstats.sort; 46 } 47 48 private template Appends(T, U) { 49 enum bool Appends = AppendsImpl!(T, U).ret; 50 } 51 52 private template AppendsImpl(T, U) { 53 T[] a; 54 U b; 55 enum bool ret = is(typeof(a ~= b)); 56 } 57 58 ///Appends to an array, deleting the old array if it has to be realloced. 59 void appendDelOld(T, U)(ref T[] to, U from) 60 if(Appends!(T, U)) { 61 auto old = to; 62 to ~= from; 63 if (old.ptr !is to.ptr && old.ptr !is null) 64 { 65 import core.memory; 66 static if (__traits(isPOD, T)) 67 GC.free(old.ptr); 68 else 69 __delete(old); 70 } 71 } 72 73 unittest { 74 uint[] foo; 75 foo.appendDelOld(5); 76 foo.appendDelOld(4); 77 foo.appendDelOld(3); 78 foo.appendDelOld(2); 79 foo.appendDelOld(1); 80 assert(foo == cast(uint[]) [5,4,3,2,1]); 81 } 82 83 private struct SHNode(K, V) { 84 alias SHNode!(K, V) SomeType; 85 SomeType* next; 86 Unqual!(K) key; 87 Unqual!(V) val; 88 } 89 90 /**Forward range struct for iterating over the keys or values of a 91 * StackHash or StackSet. The lifetime of this object must not exceed that 92 * of the underlying StackHash or StackSet.*/ 93 struct HashRange(K, S, bool vals = false) { 94 private: 95 S* set; 96 size_t index; 97 S.Node* next; 98 K* frontElem; 99 size_t _length; 100 101 this(S* set) { 102 this.set = set; 103 if(set.rNext[0] == set.usedSentinel) { 104 this.popFront; 105 } else { 106 static if(vals) { 107 frontElem = set.rVals.ptr; 108 } else { 109 frontElem = set.rKeys.ptr; 110 } 111 next = set.rNext[0]; 112 } 113 this._length = set.length; 114 } 115 116 public: 117 /// 118 void popFront() 119 in { 120 assert(!empty); 121 } do { 122 this._length--; 123 if(next is null) { 124 do { 125 index++; 126 if(index >= set.rNext.length) { 127 index = size_t.max; // Sentinel for empty. 128 return; 129 } 130 next = set.rNext[index]; 131 } while(set.rNext[index] == set.usedSentinel); 132 static if(vals) { 133 frontElem = &(set.rVals[index]); 134 } else { 135 frontElem = &(set.rKeys[index]); 136 } 137 } else { 138 static if(vals) { 139 frontElem = &(next.val); 140 } else { 141 frontElem = &(next.key); 142 } 143 next = next.next; 144 } 145 } 146 147 /// 148 static if(vals) { 149 @property ref Unqual!(K) front() 150 in { 151 assert(!empty); 152 } do { 153 return *frontElem; 154 } 155 } else { 156 @property Unqual!(K) front() 157 in { 158 assert(!empty); 159 } do { 160 return *frontElem; 161 } 162 } 163 164 /// 165 @property bool empty() { 166 return index == size_t.max; 167 } 168 169 /// 170 @property size_t length() { 171 return _length; 172 } 173 174 /// 175 @property typeof(this) save() { 176 return this; 177 } 178 } 179 180 /**A hash table that allocates its memory on RegionAllocator. Good for building a 181 * temporary hash tables that will not escape the current scope. 182 * 183 * Examples: 184 * --- 185 * auto alloc = newRegionAllocator(); 186 * auto ss = StackHash!(uint)(5, alloc); 187 * foreach(i; 0..5) { 188 * ss[i]++; 189 * } 190 * assert(ss[3] == 1); 191 * --- 192 * 193 * Warning: 194 * This implementation places removed nodes on an internal free list and 195 * recycles them, since there is no way to delete RegionAllocator-allocated data 196 * in a non-LIFO order. Therefore, you may not retain the address of a 197 * variable stored in a StackHash after deleting it from the StachHash. 198 * For example, DO NOT do this: 199 * --- 200 * SomeType* myPtr = &(myStackHash["foo"]); 201 * myStackHash.remove("foo"); 202 * *myPtr = someValue; 203 * --- 204 */ 205 struct StackHash(K, V) { 206 private: 207 alias SHNode!(K, V) Node; 208 209 // Using parallel arrays instead of structs to save on alignment overhead: 210 Unqual!(K)[] rKeys; 211 Unqual!(V)[] rVals; 212 Unqual!(Node*)[] rNext; 213 214 // Holds nodes that were deleted by remove(). 215 Node** freeList; 216 217 RegionAllocator alloc; 218 size_t _length; 219 220 // Tries to allocate off the free list. Otherwise allocates off 221 // RegionAllocator. 222 Node* allocNode() { 223 if(*freeList is null) { 224 return cast(Node*) alloc.allocate(Node.sizeof); 225 } 226 auto ret = *freeList; 227 *freeList = (*freeList).next; 228 return ret; 229 } 230 231 // Add a removed node to the free list. 232 void pushFreeList(Node* node) { 233 if(*freeList is null) { 234 node.next = null; // Sentinel 235 *freeList = node; 236 } else { 237 node.next = *freeList; 238 *freeList = node; 239 } 240 } 241 242 // rNext.ptr is stored in elements of rNext as a sentinel to indicate 243 // that the corresponding slot is unused. 244 Node* usedSentinel() @property { 245 return cast(Node*) rNext.ptr; 246 } 247 248 Node* newNode(K key) { 249 Node* ret = allocNode(); 250 ret.key = key; 251 ret.val = V.init; 252 ret.next = null; 253 return ret; 254 } 255 256 Node* newNode(K key, V val) { 257 Node* ret = allocNode(); 258 ret.key = key; 259 ret.val = val; 260 ret.next = null; 261 return ret; 262 } 263 264 hash_t getHash(K key) { 265 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 266 hash_t hash = cast(hash_t) key; 267 } else static if(__traits(compiles, key.toHash())) { 268 hash_t hash = key.toHash(); 269 } else { 270 hash_t hash = typeid(K).getHash(&key); 271 } 272 hash %= rNext.length; 273 return hash; 274 } 275 276 277 public: 278 /**Due to the nature of RegionAllocator, you must specify on object creation 279 * the approximate number of elements your table will have. Too large a 280 * number will waste space and incur poor cache performance. Too low a 281 * number will make this struct perform like a linked list. Generally, 282 * if you're building a table from some other range, some fraction of the 283 * size of that range is a good guess.*/ 284 this(size_t nElem, RegionAllocator alloc) { 285 // Obviously, the caller can never mean zero, because this struct 286 // can't work at all with nElem == 0, so assume it's a mistake and fix 287 // it here. 288 this.alloc = alloc; 289 290 if(nElem == 0) nElem++; 291 rKeys = alloc.uninitializedArray!(K[])(nElem); 292 rVals = alloc.uninitializedArray!(V[])(nElem); 293 294 // Allocate free list in same block with Node ptrs. That's what the 295 // + 1 is for. 296 rNext = alloc.uninitializedArray!(Node*[])(nElem + 1); 297 freeList = &(rNext[$ - 1]); 298 *freeList = null; 299 rNext = rNext[0..$ - 1]; 300 301 foreach(ref rKey; rKeys) { 302 rKey = K.init; 303 } 304 foreach(ref rVal; rVals) { 305 rVal = V.init; 306 } 307 foreach(ref r; rNext) { 308 r = usedSentinel; 309 } 310 311 312 } 313 314 /**Index an element of the range. If it does not exist, it will be created 315 * and initialized to V.init.*/ 316 ref V opIndex(K key) { 317 hash_t hash = getHash(key); 318 319 if(rNext[hash] == usedSentinel) { 320 rKeys[hash] = key; 321 rNext[hash] = null; 322 _length++; 323 return rVals[hash]; 324 } else if(rKeys[hash] == key) { 325 return rVals[hash]; 326 } else { // Collision. Start chaining. 327 Node** next = &(rNext[hash]); 328 while(*next !is null) { 329 if((**next).key == key) { 330 return (**next).val; 331 } 332 next = &((**next).next); 333 } 334 *next = newNode(key); 335 _length++; 336 return (**next).val; 337 } 338 } 339 340 /// 341 V opIndexAssign(V val, K key) { 342 hash_t hash = getHash(key); 343 344 if(rNext[hash] == usedSentinel) { 345 rKeys[hash] = key; 346 rVals[hash] = val; 347 rNext[hash] = null; 348 _length++; 349 return val; 350 } else if(rKeys[hash] == key) { 351 rVals[hash] = val; 352 return val; 353 } else { // Collision. Start chaining. 354 Node** next = &(rNext[hash]); 355 while(*next !is null) { 356 if((**next).key == key) { 357 (**next).val = val; 358 return val; 359 } 360 next = &((**next).next); 361 } 362 _length++; 363 *next = newNode(key, val); 364 return val; 365 } 366 } 367 368 /// 369 V* opBinaryRight(string op : "in")(K key) { 370 hash_t hash = getHash(key); 371 372 if(rNext[hash] == usedSentinel) { 373 return null; 374 } else if(rKeys[hash] == key) { 375 return &(rVals[hash]); 376 } else { // Collision. Start chaining. 377 Node* next = rNext[hash]; 378 while(next !is null) { 379 if(next.key == key) { 380 return &(next.val); 381 } 382 next = next.next; 383 } 384 return null; 385 } 386 } 387 388 /// 389 void remove(K key) { 390 hash_t hash = getHash(key); 391 392 Node** next = &(rNext[hash]); 393 if(rNext[hash] == usedSentinel) { 394 return; 395 } else if(rKeys[hash] == key) { 396 _length--; 397 if(rNext[hash] is null) { 398 rKeys[hash] = K.init; 399 rVals[hash] = V.init; 400 rNext[hash] = usedSentinel; 401 return; 402 } else { 403 Node* toPush = *next; 404 405 rKeys[hash] = (**next).key; 406 rVals[hash] = (**next).val; 407 rNext[hash] = (**next).next; 408 409 pushFreeList(toPush); 410 return; 411 } 412 } else { // Collision. Start chaining. 413 while(*next !is null) { 414 if((**next).key == key) { 415 _length--; 416 417 Node* toPush = *next; 418 *next = (**next).next; 419 420 pushFreeList(toPush); 421 break; 422 } 423 next = &((**next).next); 424 } 425 return; 426 } 427 } 428 429 /**Returns a forward range to iterate over the keys of this table. 430 * The lifetime of this range must not exceed the lifetime of this 431 * StackHash.*/ 432 auto keys() { 433 return HashRange!(K, StackHash!(K, V))(&this); 434 } 435 436 /**Returns a forward range to iterate over the values of this table. 437 * The lifetime of this range must not exceed the lifetime of this 438 * StackHash.*/ 439 auto values() { 440 return HashRange!(V, StackHash!(K, V), true)(&this); 441 } 442 443 /// 444 @property size_t length() const { 445 return _length; 446 } 447 448 /** 449 Attempt to look up a key and return a default value if the key is not 450 present. 451 */ 452 V get(K key, lazy V defaultValue) { 453 auto ptr = key in this; 454 if(ptr) return *ptr; 455 return defaultValue; 456 } 457 458 int opApply(int delegate(ref K, ref V) dg) { 459 auto k = this.keys; 460 auto v = this.values; 461 int res; 462 463 while(!k.empty) { 464 auto kFront = k.front; 465 res = dg(kFront, v.front); 466 k.popFront; 467 v.popFront; 468 if(res) { 469 break; 470 } 471 } 472 473 return res; 474 } 475 476 real efficiency() { 477 uint used = 0; 478 foreach(root; rNext) { 479 if(root != usedSentinel) { 480 used++; 481 } 482 } 483 return cast(real) used / rNext.length; 484 } 485 } 486 487 unittest { 488 alias StackHash!(string, uint) mySh; 489 490 { // Basic sanity checks. 491 auto alloc = newRegionAllocator(); 492 auto data = mySh(2, alloc); // Make sure we get some collisions. 493 data["foo"] = 1; 494 data["bar"] = 2; 495 data["baz"] = 3; 496 data["waldo"] = 4; 497 assert(!("foobar" in data)); 498 assert(*("foo" in data) == 1); 499 assert(*("bar" in data) == 2); 500 assert(*("baz" in data) == 3); 501 assert(*("waldo" in data) == 4); 502 assert(data["foo"] == 1); 503 assert(data["bar"] == 2); 504 assert(data["baz"] == 3); 505 assert(data["waldo"] == 4); 506 auto myKeys = array(data.keys); 507 qsort(myKeys); 508 assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 509 auto myValues = array(data.values); 510 qsort(myValues); 511 assert(myValues == [1U, 2, 3, 4]); 512 { 513 auto k = data.keys; 514 auto v = data.values; 515 while(!k.empty) { 516 assert(data[k.front] == v.front); 517 k.popFront; 518 v.popFront; 519 } 520 } 521 foreach(v; data.values) { 522 assert(v > 0 && v < 5); 523 } 524 } 525 526 alias StackHash!(uint, uint) mySh2; 527 { // Test remove. 528 auto alloc = newRegionAllocator(); 529 530 auto foo = mySh2(7, alloc); 531 for(uint i = 0; i < 200; i++) { 532 foo[i] = i; 533 } 534 assert(foo.length == 200); 535 for(uint i = 0; i < 200; i += 2) { 536 foo.remove(i); 537 } 538 foreach(i; 20..200) { 539 foo.remove(i); 540 } 541 for(uint i = 0; i < 20; i++) { 542 if(i & 1) { 543 assert(i in foo); 544 assert(*(i in foo) == i); 545 } else { 546 assert(!(i in foo)); 547 } 548 } 549 auto vals = array(foo.values); 550 assert(foo.length == 10); 551 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 552 } 553 554 { // Monte carlo unittesting against builtin hash table. 555 auto alloc = newRegionAllocator(); 556 uint[uint] builtin; 557 auto monteSh = mySh2(20_000, alloc); 558 uint[] nums = alloc.uninitializedArray!(uint[])(100_000); 559 foreach(ref num; nums) { 560 num = uniform(0U, uint.max); 561 } 562 563 foreach(i; 0..1_000_000) { 564 auto index = uniform(0, cast(uint) nums.length); 565 if(index in builtin) { 566 assert(index in monteSh); 567 assert(builtin[index] == nums[index]); 568 assert(monteSh[index] == nums[index]); 569 builtin.remove(index); 570 monteSh.remove(index); 571 } else { 572 assert(!(index in monteSh)); 573 builtin[index] = nums[index]; 574 monteSh[index] = nums[index]; 575 } 576 } 577 578 assert(builtin.length == monteSh.length); 579 foreach(k, v; builtin) { 580 assert(k in monteSh); 581 assert(*(k in builtin) == *(k in monteSh)); 582 assert(monteSh[k] == v); 583 } 584 585 // Make sure nothing is missed in iteration. Since both keys and 586 // values use the same struct, just with a few static if statements, 587 // if it works for keys and simple tests work for values, it works. 588 foreach(k; monteSh.keys) { 589 builtin.remove(k); 590 } 591 assert(builtin.length == 0); 592 593 } 594 } 595 596 /**A hash set that allocates its memory on RegionAllocator. Good for building a 597 * temporary set that will not escape the current scope. 598 * 599 * Examples: 600 * --- 601 * auto alloc = newRegionAllocator(); 602 * auto ss = StackSet!(uint)(5, alloc); 603 * foreach(i; 0..5) { 604 * ss.insert(i); 605 * } 606 * assert(3 in ss); 607 * --- 608 */ 609 struct StackSet(K) { 610 private: 611 // Choose smallest representation of the data. 612 struct Node1 { 613 Node1* next; 614 K key; 615 } 616 617 struct Node2 { 618 K key; 619 Node2* next; 620 } 621 622 static if(Node1.sizeof < Node2.sizeof) { 623 alias Node1 Node; 624 } else { 625 alias Node2 Node; 626 } 627 628 Unqual!(K)[] rKeys; 629 Node*[] rNext; 630 631 Node** freeList; 632 size_t _length; 633 RegionAllocator alloc; 634 635 Node* usedSentinel() { 636 return cast(Node*) rNext.ptr; 637 } 638 639 // Tries to allocate off the free list. Otherwise allocates off 640 // RegionAllocator. 641 Node* allocNode() { 642 if(*freeList is null) { 643 return cast(Node*) alloc.allocate(Node.sizeof); 644 } 645 auto ret = *freeList; 646 *freeList = (*freeList).next; 647 return ret; 648 } 649 650 // Add a removed node to the free list. 651 void pushFreeList(Node* node) { 652 if(*freeList is null) { 653 node.next = null; // Sentinel 654 *freeList = node; 655 } else { 656 node.next = *freeList; 657 *freeList = node; 658 } 659 } 660 661 Node* newNode(K key) { 662 Node* ret = allocNode(); 663 ret.key = key; 664 ret.next = null; 665 return ret; 666 } 667 668 hash_t getHash(K key) { 669 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 670 hash_t hash = cast(hash_t) key; 671 } else static if(__traits(compiles, key.toHash())) { 672 hash_t hash = key.toHash(); 673 } else { 674 hash_t hash = typeid(K).getHash(&key); 675 } 676 hash %= rNext.length; 677 return hash; 678 } 679 680 public: 681 /**Due to the nature of RegionAllocator, you must specify on object creation 682 * the approximate number of elements your set will have. Too large a 683 * number will waste space and incur poor cache performance. Too low a 684 * number will make this struct perform like a linked list. Generally, 685 * if you're building a set from some other range, some fraction of the 686 * size of that range is a good guess.*/ 687 this(size_t nElem, RegionAllocator alloc) { 688 this.alloc = alloc; 689 690 // Obviously, the caller can never mean zero, because this struct 691 // can't work at all with nElem == 0, so assume it's a mistake and fix 692 // it here. 693 if(nElem == 0) nElem++; 694 695 // Allocate the free list as the last element of rNext. 696 rNext = alloc.uninitializedArray!(Node*[])(nElem + 1); 697 freeList = &(rNext[$ - 1]); 698 *freeList = null; 699 rNext = rNext[0..$ - 1]; 700 701 foreach(ref root; rNext) { 702 root = usedSentinel; 703 } 704 705 rKeys = alloc.uninitializedArray!(Unqual!(K)[])(nElem); 706 foreach(ref root; rKeys) { 707 root = K.init; 708 } 709 } 710 711 /// 712 void insert(K key) { 713 hash_t hash = getHash(key); 714 715 if(rNext[hash] == usedSentinel) { 716 rKeys[hash] = key; 717 rNext[hash] = null; 718 _length++; 719 return; 720 } else if(rKeys[hash] == key) { 721 return; 722 } else { // Collision. Start chaining. 723 Node** next = &(rNext[hash]); 724 while(*next !is null) { 725 if((**next).key == key) { 726 return; 727 } 728 next = &((**next).next); 729 } 730 *next = newNode(key); 731 _length++; 732 return; 733 } 734 } 735 736 /**Returns a forward range of the elements of this struct. The range's 737 * lifetime must not exceed the lifetime of this object.*/ 738 auto elems() { 739 return HashRange!(K, typeof(this))(&this); 740 } 741 742 /// 743 bool opBinaryRight(string op : "in")(K key) { 744 hash_t hash = getHash(key); 745 746 if(rNext[hash] == usedSentinel) { 747 return false; 748 } else if(rKeys[hash] == key) { 749 return true; 750 } else { // Collision. Start chaining. 751 Node* next = rNext[hash]; 752 while(next !is null) { 753 if(next.key == key) { 754 return true; 755 } 756 next = next.next; 757 } 758 return false; 759 } 760 } 761 762 /// 763 void remove(K key) { 764 hash_t hash = getHash(key); 765 766 Node** next = &(rNext[hash]); 767 if(rNext[hash] == usedSentinel) { 768 return; 769 } else if(rKeys[hash] == key) { 770 _length--; 771 if(rNext[hash] is null) { 772 rKeys[hash] = K.init; 773 rNext[hash] = usedSentinel; 774 return; 775 } else { 776 Node* toPush = *next; 777 778 rKeys[hash] = (**next).key; 779 rNext[hash] = (**next).next; 780 781 pushFreeList(toPush); 782 return; 783 } 784 } else { // Collision. Start chaining. 785 while(*next !is null) { 786 if((**next).key == key) { 787 _length--; 788 Node* toPush = *next; 789 790 *next = (**next).next; 791 pushFreeList(toPush); 792 break; 793 } 794 next = &((**next).next); 795 } 796 return; 797 } 798 } 799 800 /// 801 @property size_t length() { 802 return _length; 803 } 804 } 805 806 unittest { 807 { // "Normal" unittesting. 808 auto alloc = newRegionAllocator(); 809 alias StackSet!(uint) mySS; 810 mySS set = mySS(12, alloc); 811 foreach(i; 0..20) { 812 set.insert(i); 813 } 814 assert(array(set.elems).qsort == seq(0U, 20U)); 815 816 for(uint i = 0; i < 20; i += 2) { 817 set.remove(i); 818 } 819 820 foreach(i; 0..20) { 821 if(i & 1) { 822 assert(i in set); 823 } else { 824 assert(!(i in set)); 825 } 826 } 827 uint[] contents; 828 829 foreach(elem; set.elems) { 830 contents ~= elem; 831 } 832 assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]); 833 } 834 835 { // Monte carlo unittesting against builtin hash table. 836 auto alloc = newRegionAllocator(); 837 bool[uint] builtin; 838 auto monteSh = StackSet!uint(20_000, alloc); 839 840 foreach(i; 0..1_000_000) { 841 auto index = uniform(0, 100_000); 842 if(index in builtin) { 843 assert(index in monteSh); 844 builtin.remove(index); 845 monteSh.remove(index); 846 } else { 847 assert(!(index in monteSh)); 848 builtin[index] = 1; 849 monteSh.insert(index); 850 } 851 } 852 853 assert(builtin.length == monteSh.length); 854 foreach(k, v; builtin) { 855 assert(k in monteSh); 856 } 857 858 foreach(k; monteSh.elems) { 859 builtin.remove(k); 860 } 861 assert(builtin.length == 0); 862 } 863 } 864 865 private int height(T)(const T node) nothrow { 866 return (node is null) ? 0 : node.height; 867 } 868 869 struct AVLNodeRealHeight(T) { 870 T payload; 871 typeof(this)* left; 872 typeof(this)* right; 873 int height; 874 875 int balance() const nothrow @property { 876 return .height(left) - .height(right); 877 } 878 879 void fixHeight() nothrow { 880 auto leftHeight = .height(left); 881 auto rightHeight = .height(right); 882 883 height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1; 884 } 885 886 bool isLeaf() nothrow @property { 887 return left is null && right is null; 888 } 889 } 890 891 /* Store the height in the low order bits of the pointers to save space, 892 * since RegionAllocator allocates 16-byte aligned memory anyhow, but only if 893 * this would be smaller after considering alignment. 894 */ 895 struct AVLNodeBitwise(T) { 896 T payload; 897 size_t _left; 898 size_t _right; 899 900 enum size_t mask = 0b1111; 901 enum size_t notMask = ~mask; 902 903 typeof(this)* left() nothrow @property { 904 return cast(typeof(return)) (_left & notMask); 905 } 906 907 const(typeof(this))* left() const nothrow @property { 908 return cast(typeof(return)) (_left & notMask); 909 } 910 911 void left(typeof(this)* newLeft) nothrow @property 912 in { 913 assert((cast(size_t) newLeft & mask) == 0); 914 } do { 915 _left &= mask; 916 _left |= cast(size_t) newLeft; 917 assert(left is newLeft); 918 } 919 920 typeof(this)* right() nothrow @property { 921 return cast(typeof(return)) (_right & notMask); 922 } 923 924 const(typeof(this))* right() const nothrow @property { 925 return cast(typeof(return)) (_right & notMask); 926 } 927 928 void right(typeof(this)* newRight) nothrow @property 929 in { 930 assert((cast(size_t) newRight & mask) == 0); 931 } do { 932 _right &= mask; 933 _right |= cast(size_t) newRight; 934 assert(right is newRight); 935 } 936 937 int height() const nothrow @property { 938 return (((_left & mask) << 4) | 939 (_right & mask)); 940 } 941 942 void height(int newHeight) nothrow @property { 943 _right &= notMask; 944 _right |= (newHeight & mask); 945 newHeight >>= 4; 946 _left &= notMask; 947 _left |= (newHeight & mask); 948 } 949 950 int balance() const nothrow @property { 951 return .height(left) - .height(right); 952 } 953 954 void fixHeight() nothrow { 955 auto leftHeight = .height(left); 956 auto rightHeight = .height(right); 957 958 height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1; 959 } 960 961 bool isLeaf() const nothrow @property { 962 return left is null && right is null; 963 } 964 } 965 966 private template GetAligned(uint size) { 967 static if(size % RegionAllocator.alignBytes(size) == 0) { 968 enum GetAligned = 0; 969 } else { 970 enum GetAligned = 971 size - size % RegionAllocator.alignBytes(size) + 972 RegionAllocator.alignBytes(size); 973 } 974 } 975 976 /**An AVL tree implementation on top of RegionAllocator. If elements are removed, 977 * they are stored on an internal free list and recycled when new elements 978 * are added to the tree. 979 * 980 * Template paramters: 981 * 982 * T = The type to be stored in the tree. 983 * 984 * key = Function to access the key that what you're storing is to be compared 985 * on. 986 * 987 * compFun = The function for comparing keys. 988 * 989 * Examples: 990 * --- 991 * struct StringNum { 992 * string someString; 993 * uint num; 994 * } 995 * 996 * // Create a StackTree of StringNums, sorted in descending order, using 997 * // someString for comparison. 998 * auto alloc = newRegionAllocator(); 999 * auto myTree = StackTree!(StringNum, "a.someString", "a > b")(alloc); 1000 * 1001 * // Add some elements. 1002 * myTree.insert( StringNum("foo", 1)); 1003 * myTree.insert( StringNum("bar", 2)); 1004 * myTree.insert( StringNum("foo", 3)); 1005 * 1006 * assert(myTree.find("foo") == StringNum("foo", 3)); 1007 * assert(myTree.find("bar") == StringNum("bar", 2)); 1008 * --- 1009 * 1010 * Note: This tree supports a compile-time interface similar to StackSet 1011 * and can be used as a finite set implementation. 1012 * 1013 * Warning: 1014 * This implementation places removed nodes on an internal free list and 1015 * recycles them, since there is no way to delete RegionAllocator-allocated data 1016 * in a non-LIFO order. Therefore, you may not retain the address of a 1017 * variable stored in a StackTree after deleting it from the StackTree. 1018 * For example, DO NOT do this: 1019 * --- 1020 * SomeType* myPtr = "foo" in myTree; 1021 * myTree.remove("foo"); 1022 * *myPtr = someValue; 1023 * --- 1024 */ 1025 struct StackTree(T, alias key = "a", alias compFun = "a < b") { 1026 private: 1027 1028 alias AVLNodeBitwise!(T) BitwiseNode; 1029 alias AVLNodeRealHeight!(T) RealNode; 1030 1031 enum size_t bitSize = GetAligned!(BitwiseNode.sizeof); 1032 enum size_t realHeightSize = GetAligned!(RealNode.sizeof); 1033 1034 static if(bitSize < realHeightSize ) { 1035 alias AVLNodeBitwise!(T) Node; 1036 } else { 1037 alias AVLNodeRealHeight!(T) Node; 1038 } 1039 1040 alias binaryFun!(compFun) comp; 1041 alias unaryFun!(key) getKey; 1042 1043 Node* head; 1044 Node** freeList; 1045 size_t _length; 1046 RegionAllocator alloc; 1047 1048 static bool insertComp(T lhs, T rhs) { 1049 return comp( getKey(lhs), getKey(rhs)); 1050 } 1051 1052 static Node* rotateRight(Node* node) 1053 in { 1054 assert(node.left !is null); 1055 assert( abs(node.balance) <= 2); 1056 1057 } do { 1058 Node* newHead = node.left; 1059 node.left = newHead.right; 1060 newHead.right = node; 1061 1062 node.fixHeight(); 1063 newHead.fixHeight(); 1064 1065 assert( abs(node.balance) < 2); 1066 return newHead; 1067 } 1068 1069 static Node* rotateLeft(Node* node) 1070 in { 1071 assert(node.right !is null); 1072 assert( abs(node.balance) <= 2); 1073 } do { 1074 Node* newHead = node.right; 1075 node.right = newHead.left; 1076 newHead.left = node; 1077 1078 node.fixHeight(); 1079 newHead.fixHeight(); 1080 1081 assert( abs(node.balance) < 2); 1082 return newHead; 1083 } 1084 1085 static Node* rebalance(Node* node) 1086 in { 1087 assert(node is null || abs(node.balance) <= 2); 1088 } out(ret) { 1089 assert( abs(ret.balance) < 2); 1090 } do { 1091 if(node is null) { 1092 return null; 1093 } 1094 1095 immutable balance = node.balance; 1096 if(abs(balance) <= 1) { 1097 return node; 1098 } 1099 1100 if(balance == -2) { 1101 1102 // It should be impossible to have a balance factor of -2 if 1103 // node.right is null. 1104 assert(node.right !is null); 1105 immutable rightBalance = node.right.balance; 1106 assert( abs(rightBalance) < 2); 1107 1108 if(rightBalance == 1) { 1109 node.right = rotateRight(node.right); 1110 node.fixHeight(); 1111 } 1112 1113 assert(node.balance == -2); 1114 return rotateLeft(node); 1115 1116 } else if(balance == 2) { 1117 // It should be impossible to have a balance factor of 2 if 1118 // node.left is null. 1119 assert(node.left !is null); 1120 immutable leftBalance = node.left.balance; 1121 assert( abs(leftBalance) < 2); 1122 1123 if(leftBalance == -1) { 1124 node.left = rotateLeft(node.left); 1125 node.fixHeight(); 1126 } 1127 1128 assert(node.balance == 2); 1129 return rotateRight(node); 1130 } 1131 1132 // AVL tree invariant is that abs(balance) <= 2 even during 1133 // insertion/deletion. 1134 assert(0); 1135 } 1136 1137 void pushFreeList(Node* node) { 1138 node.left = null; 1139 node.right = *freeList; 1140 *freeList = node; 1141 } 1142 1143 Node* popFreeList() 1144 in { 1145 assert(freeList); 1146 assert(*freeList); 1147 } do { 1148 auto ret = *freeList; 1149 *freeList = ret.right; 1150 return ret; 1151 } 1152 1153 Node* newNode(T payload) 1154 in { 1155 assert(freeList, "Uninitialized StackTree!(" ~ T.stringof ~ ")"); 1156 } do { 1157 Node* ret; 1158 if(*freeList !is null) { 1159 ret = popFreeList(); 1160 } else { 1161 ret = cast(Node*) alloc.allocate(Node.sizeof); 1162 } 1163 1164 ret.payload = payload; 1165 ret.left = null; 1166 ret.right = null; 1167 ret.height = 1; 1168 return ret; 1169 } 1170 1171 public: 1172 /// 1173 this(RegionAllocator alloc) { 1174 this.alloc = alloc; 1175 this.freeList = alloc.uninitializedArray!(Node*[])(1).ptr; 1176 *(this.freeList) = null; 1177 } 1178 1179 /**Insert an element.*/ 1180 void insert(T toInsert) { 1181 if(head is null) { 1182 head = newNode(toInsert); 1183 _length++; 1184 } else { 1185 head = insertImpl(toInsert, head); 1186 } 1187 } 1188 1189 Node* insertImpl(T toInsert, Node* insertInto) { 1190 if( insertComp(toInsert, insertInto.payload) ) { 1191 if(insertInto.left is null) { 1192 insertInto.left = newNode(toInsert); 1193 _length++; 1194 } else { 1195 insertInto.left = insertImpl(toInsert, insertInto.left); 1196 } 1197 } else if( insertComp(insertInto.payload, toInsert) ) { 1198 if(insertInto.right is null) { 1199 insertInto.right = newNode(toInsert); 1200 _length++; 1201 } else { 1202 insertInto.right = insertImpl(toInsert, insertInto.right); 1203 } 1204 } else { 1205 // This is correct: If the comparison key is only part of the 1206 // payload, the old payload may not be equal to the new payload, 1207 // even if the comparison keys are equal. 1208 insertInto.payload = toInsert; 1209 return insertInto; 1210 } 1211 1212 insertInto.fixHeight(); 1213 return rebalance(insertInto); 1214 } 1215 1216 /**Remove an element from this tree. The type of U is expected to be the 1217 * type of the key that this tree is sorted on. 1218 */ 1219 void remove(U)(U whatToRemove) { 1220 Node* removedNode; 1221 Node* leftMost; 1222 1223 Node* removeLeftMost(Node* node) { 1224 if(node.left is null) { 1225 auto ret = node.right; 1226 node.right = null; 1227 leftMost = node; 1228 return ret; 1229 } 1230 1231 node.left = removeLeftMost(node.left); 1232 node.fixHeight(); 1233 return rebalance(node); 1234 } 1235 1236 Node* removeSuccessor(Node* node) { 1237 if(node.right is null) { 1238 assert(node.left.isLeaf); 1239 leftMost = node.left; 1240 1241 node.left = null; 1242 return node; 1243 } 1244 1245 node.right = removeLeftMost(node.right); 1246 node.fixHeight(); 1247 return node; 1248 } 1249 1250 Node* removeImpl(U whatToRemove, Node* whereToRemove) { 1251 static bool findComp(V, W)(V lhs, W rhs) { 1252 static if(is(V == T)) { 1253 static assert(is(W == U)); 1254 return comp( getKey(lhs), rhs); 1255 } else { 1256 static assert(is(V == U)); 1257 static assert(is(W == T)); 1258 return comp(lhs, getKey(rhs) ); 1259 } 1260 } 1261 1262 if(whereToRemove is null) { 1263 return null; 1264 } 1265 1266 if( findComp(whatToRemove, whereToRemove.payload) ){ 1267 whereToRemove.left = removeImpl(whatToRemove, whereToRemove.left); 1268 whereToRemove.fixHeight(); 1269 return rebalance(whereToRemove); 1270 } else if( findComp(whereToRemove.payload, whatToRemove) ) { 1271 whereToRemove.right = removeImpl(whatToRemove, whereToRemove.right); 1272 whereToRemove.fixHeight(); 1273 return rebalance(whereToRemove); 1274 } else { 1275 // We've found it. 1276 _length--; 1277 removedNode = whereToRemove; 1278 if(whereToRemove.isLeaf) { 1279 return null; 1280 } 1281 1282 whereToRemove = removeSuccessor(whereToRemove); 1283 if(leftMost is null) { 1284 return null; 1285 } 1286 1287 leftMost.left = whereToRemove.left; 1288 leftMost.right = whereToRemove.right; 1289 leftMost.fixHeight(); 1290 return rebalance(leftMost); 1291 } 1292 } 1293 1294 head = removeImpl(whatToRemove, head); 1295 1296 debug(EXPENSIVE) assertAvl(head); 1297 1298 if(removedNode !is null) { 1299 pushFreeList(removedNode); 1300 } 1301 } 1302 1303 /**Find an element and return it. Throw an exception if it is not 1304 * present. U is expected to be the type of the key that this tree is 1305 * sorted on.*/ 1306 T find(U)(U whatToFind) { 1307 T* ptr = dstatsEnforce(whatToFind in this, 1308 "Item not found: " ~ to!string(whatToFind)); 1309 return *ptr; 1310 } 1311 1312 /**Find an element and return a pointer to it, or null if not present.*/ 1313 T* opBinaryRight(string op : "in", U)(U whatToFind) { 1314 auto ret = findImpl!(U)(whatToFind, head); 1315 if(ret is null) { 1316 return null; 1317 } 1318 return &(ret.payload); 1319 } 1320 1321 Node* findImpl(U)(U whatToFind, Node* whereToFind) { 1322 static bool findComp(V, W)(V lhs, W rhs) { 1323 static if(is(V == T)) { 1324 static assert(is(W == U)); 1325 return comp( getKey(lhs), rhs ); 1326 } else { 1327 static assert(is(V == U)); 1328 static assert(is(W == T)); 1329 return comp( lhs, getKey(rhs) ); 1330 } 1331 } 1332 1333 if(whereToFind is null) { 1334 return null; 1335 } 1336 1337 if( findComp(whatToFind, whereToFind.payload) ){ 1338 return findImpl!(U)(whatToFind, whereToFind.left); 1339 } else if( findComp(whereToFind.payload, whatToFind) ) { 1340 return findImpl!(U)(whatToFind, whereToFind.right); 1341 } else { 1342 // We've found it. 1343 return whereToFind; 1344 } 1345 1346 assert(0); 1347 } 1348 1349 /**Iterate over the elements of this tree in sorted order.*/ 1350 int opApply( int delegate(ref T) dg) { 1351 int res; 1352 int opApplyImpl(Node* node) { 1353 if(node is null) { 1354 return 0; 1355 } 1356 res = opApplyImpl(node.left); 1357 if(res) { 1358 return res; 1359 } 1360 res = dg(node.payload); 1361 if(res) { 1362 return res; 1363 } 1364 res = opApplyImpl(node.right); 1365 return res; 1366 } 1367 1368 return opApplyImpl(head); 1369 } 1370 1371 /**Number of elements in the tree.*/ 1372 @property size_t length() const pure nothrow { 1373 return _length; 1374 } 1375 } 1376 1377 private int assertAvl(T)(T node) { 1378 if(node is null) { 1379 return 0; 1380 } 1381 1382 int leftHeight = assertAvl(node.left); 1383 int rightHeight = assertAvl(node.right); 1384 assert(node.height == max(leftHeight, rightHeight) + 1); 1385 assert( abs(node.balance) < 2, 1386 text( height(node.left), '\t', height(node.right))); 1387 1388 if(node.left) { 1389 assert(node.left.payload < node.payload); 1390 } 1391 1392 if(node.right) { 1393 assert(node.right.payload > node.payload, 1394 text(node.payload, ' ', node.right.payload)); 1395 } 1396 1397 return node.height; 1398 } 1399 1400 1401 unittest { 1402 // Test against StackSet on random data. 1403 auto alloc = newRegionAllocator(); 1404 StackTree!(uint) myTree = StackTree!(uint)(alloc); 1405 StackSet!(uint) ss = StackSet!(uint)(500, alloc); 1406 foreach(i; 0..1_000_000) { 1407 uint num = uniform(0, 1_000); 1408 if(num in ss) { 1409 assert(num in myTree); 1410 assert(*(num in myTree) == num); 1411 ss.remove(num); 1412 myTree.remove(num); 1413 } else { 1414 assert(!(num in myTree)); 1415 ss.insert(num); 1416 myTree.insert(num); 1417 } 1418 } 1419 assertAvl(myTree.head); 1420 } 1421 1422 /**Struct that iterates over keys or values of a StackTreeAA. 1423 * 1424 * Bugs: Uses opApply instead of the more flexible ranges, because I 1425 * haven't figured out how to iterate efficiently and in sorted order over a 1426 * tree without control of the stack. 1427 */ 1428 struct TreeAaIter(T, alias mapFun) { 1429 alias unaryFun!(mapFun) mFun; 1430 T tree; 1431 alias typeof(*(tree.head)) Node; 1432 1433 // TreeRange!(T, mapFun) asRange() { 1434 // dstatsEnforce(0, "Not implemented yet."); 1435 // } 1436 1437 alias typeof( mFun( tree.head.payload ) ) IterType; 1438 1439 /// 1440 int opApply( int delegate(ref IterType) dg) { 1441 int res; 1442 int opApplyImpl(Node* node) { 1443 if(node is null) { 1444 return 0; 1445 } 1446 res = opApplyImpl(node.left); 1447 if(res) { 1448 return res; 1449 } 1450 1451 static if(__traits(compiles, dg(mFun(node.payload)))) { 1452 res = dg(mFun(node.payload)); 1453 } else { 1454 auto toDg = mFun(node.payload); 1455 res = dg(toDg); 1456 } 1457 1458 if(res) { 1459 return res; 1460 } 1461 res = opApplyImpl(node.right); 1462 return res; 1463 } 1464 1465 return opApplyImpl(tree.head); 1466 } 1467 1468 /// 1469 @property size_t length() const pure nothrow { 1470 return tree.length; 1471 } 1472 } 1473 1474 private struct StackTreeAANode(K, V) { 1475 Unqual!(K) key; 1476 Unqual!(V) value; 1477 } 1478 1479 /**An associative array implementation based on StackTree. Lookups and 1480 * insertions are O(log N). This is significantly slower in both theory and 1481 * practice than StackHash, but you may want to use it if: 1482 * 1483 * 1. You don't know the approximate size of the table you will be creating 1484 * in advance. Unlike StackHash, this AA implementation does not need 1485 * to pre-allocate anything. 1486 * 1487 * 2. You care more about worst-case performance than average-case 1488 * performance. 1489 * 1490 * 3. You have a good comparison function for your type, but not a good hash 1491 * function. 1492 * 1493 */ 1494 struct StackTreeAA(K, V) { 1495 alias StackTreeAANode!(K, V) Node; 1496 StackTree!(Node, "a.key") tree; 1497 1498 /// 1499 this(RegionAllocator alloc) { 1500 this.tree = typeof(tree)(alloc); 1501 } 1502 1503 /**Looks up key in the table, returns it by reference. If it does not 1504 * exist, it will be created and initialized to V.init. This is handy, 1505 * for example, when counting things with integer types. 1506 */ 1507 ref V opIndex(K key) { 1508 Node* result = key in tree; 1509 if(result is null) { 1510 tree.insert( Node(key, V.init)); 1511 result = key in tree; 1512 } 1513 1514 return result.value; 1515 } 1516 1517 /// 1518 V opIndexAssign(V val, K key) { 1519 tree.insert( Node(key, val)); 1520 return val; 1521 } 1522 1523 /// 1524 V* opBinaryRight(string op : "in")(K key) { 1525 auto nodePtr = key in tree; 1526 if(nodePtr is null) { 1527 return null; 1528 } 1529 1530 return &(nodePtr.value); 1531 } 1532 1533 /// 1534 void remove(K key) { 1535 tree.remove(key); 1536 } 1537 1538 /// 1539 @property size_t length() const pure nothrow { 1540 return tree.length; 1541 } 1542 1543 /// 1544 TreeAaIter!( typeof(tree), "a.key") keys() @property { 1545 typeof(return) ret; 1546 ret.tree = tree; 1547 return ret; 1548 } 1549 1550 private static ref Unqual!(V) getVal(ref Node node) { 1551 return node.value; 1552 } 1553 1554 /// 1555 TreeAaIter!( typeof(tree), getVal) values() @property { 1556 return typeof(return)(tree); 1557 } 1558 1559 /**Iterate over both the keys and values of this associative array.*/ 1560 int opApply( int delegate(ref Unqual!(K), ref Unqual!(V)) dg) { 1561 alias typeof(*(tree.head)) TreeNode; 1562 int res; 1563 int opApplyImpl(TreeNode* node) { 1564 if(node is null) { 1565 return 0; 1566 } 1567 res = opApplyImpl(node.left); 1568 if(res) { 1569 return res; 1570 } 1571 res = dg(node.payload.key, node.payload.value); 1572 if(res) { 1573 return res; 1574 } 1575 res = opApplyImpl(node.right); 1576 return res; 1577 } 1578 1579 return opApplyImpl(tree.head); 1580 } 1581 1582 } 1583 1584 unittest { 1585 1586 // Test against builtin AA on random data. 1587 { 1588 auto alloc = newRegionAllocator(); 1589 alias StackTreeAA!(string, uint) mySh; 1590 auto data = mySh(alloc); 1591 data["foo"] = 1; 1592 data["bar"] = 2; 1593 data["baz"] = 3; 1594 data["waldo"] = 4; 1595 assert(!("foobar" in data)); 1596 assert(*("foo" in data) == 1); 1597 assert(*("bar" in data) == 2); 1598 assert(*("baz" in data) == 3); 1599 assert(*("waldo" in data) == 4); 1600 assert(data["foo"] == 1); 1601 assert(data["bar"] == 2); 1602 assert(data["baz"] == 3); 1603 assert(data["waldo"] == 4); 1604 1605 assert(data.length == 4); 1606 auto myKeys = array(data.keys); 1607 qsort(myKeys); 1608 assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 1609 auto myValues = array(data.values); 1610 qsort(myValues); 1611 assert(myValues == [1U, 2, 3, 4]); 1612 1613 foreach(v; data.values) { 1614 assert(v > 0 && v < 5); 1615 } 1616 } 1617 1618 alias StackTreeAA!(uint, uint) mySh2; 1619 { // Test remove. 1620 auto alloc = newRegionAllocator(); 1621 1622 auto foo = mySh2(alloc); 1623 for(uint i = 0; i < 200; i++) { 1624 foo[i] = i; 1625 } 1626 assert(foo.length == 200); 1627 for(uint i = 0; i < 200; i += 2) { 1628 foo.remove(i); 1629 } 1630 foreach(i; 20..200) { 1631 foo.remove(i); 1632 } 1633 for(uint i = 0; i < 20; i++) { 1634 if(i & 1) { 1635 assert(i in foo); 1636 assert(*(i in foo) == i); 1637 } else { 1638 assert(!(i in foo)); 1639 } 1640 } 1641 auto vals = array(foo.values); 1642 assert(foo.length == 10); 1643 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 1644 } 1645 1646 { // Monte carlo unittesting against builtin hash table. 1647 auto alloc = newRegionAllocator(); 1648 uint[uint] builtin; 1649 auto monteSh = mySh2(alloc); 1650 uint[] nums = alloc.uninitializedArray!(uint[])(100_000); 1651 foreach(ref num; nums) { 1652 num = uniform(0U, uint.max); 1653 } 1654 1655 foreach(i; 0..10_000) { 1656 auto index = uniform(0, cast(uint) nums.length); 1657 if(index in builtin) { 1658 assert(index in monteSh); 1659 assert(builtin[index] == nums[index]); 1660 assert(monteSh[index] == nums[index]); 1661 builtin.remove(index); 1662 monteSh.remove(index); 1663 } else { 1664 assert(!(index in monteSh)); 1665 builtin[index] = nums[index]; 1666 monteSh[index] = nums[index]; 1667 } 1668 } 1669 1670 assert(builtin.length == monteSh.length); 1671 foreach(k, v; builtin) { 1672 assert(k in monteSh); 1673 assert(*(k in builtin) == *(k in monteSh)); 1674 assert(monteSh[k] == v); 1675 } 1676 1677 // Make sure nothing is missed in iteration. Since both keys and 1678 // values use the same struct, just with a few static if statements, 1679 // if it works for keys and simple tests work for values, it works. 1680 foreach(k; monteSh.keys) { 1681 builtin.remove(k); 1682 } 1683 assert(builtin.length == 0); 1684 } 1685 } 1686 1687 version(scid) { 1688 public import scid.internal.regionallocator; 1689 } else { 1690 version = noscid; 1691 } 1692 1693 version(noscid): 1694 1695 import std.traits, core.memory, std.range, core.exception, std.conv, 1696 std.algorithm, std.typetuple, std.exception, std.typecons; 1697 1698 static import core.stdc.stdlib; 1699 1700 // This is just for convenience/code readability/saving typing. 1701 private enum ptrSize = (void*).sizeof; 1702 1703 // This was accidentally assumed in a few places and I'm too lazy to fix it 1704 // until I see proof that it needs to be fixed. 1705 static assert(bool.sizeof == 1); 1706 1707 enum size_t defaultSegmentSize = 4 * 1_024 * 1_024; 1708 1709 /** 1710 The exception that is thrown on invalid use of $(RegionAllocator) and 1711 $(D RegionAllocatorStack). This exception is not thrown on out of memory. 1712 An $(D OutOfMemoryError) is thrown instead. 1713 */ 1714 class RegionAllocatorException : Exception { 1715 this(string msg, string file, int line) @safe { 1716 super(msg, file, line); 1717 } 1718 } 1719 1720 /** 1721 This flag determines whether a given $(D RegionAllocatorStack) is scanned for 1722 pointers by the garbage collector (GC). If yes, the entire stack is scanned, 1723 not just the part currently in use, since there is currently no efficient way to 1724 modify the bounds of a GC region. The stack is scanned conservatively, meaning 1725 that any bit pattern that would point to GC-allocated memory if interpreted as 1726 a pointer is considered to be a pointer. This can result in GC-allocated 1727 memory being retained when it should be freed. Due to these caveats, 1728 it is recommended that any stack scanned by the GC be small and/or short-lived. 1729 */ 1730 enum GCScan : bool { 1731 /// 1732 no = false, 1733 1734 /// 1735 yes = true 1736 } 1737 1738 /** 1739 This object represents a segmented stack. Memory can be allocated from this 1740 stack using a $(XREF regionallocator RegionAllocator) object. Multiple 1741 $(D RegionAllocator) objects may be created per 1742 $(D RegionAllocatorStack) but each $(D RegionAllocator) uses a single 1743 $(D RegionAllocatorStack). 1744 1745 For most use cases it's convenient to use the default thread-local 1746 instance of $(D RegionAllocatorStack), which is lazily instantiated on 1747 the first call to the global function 1748 $(XREF regionallocator, newRegionAllocator). Occasionally it may be useful 1749 to have multiple independent stacks in one thread, in which case a 1750 $(D RegionAllocatorStack) can be created manually. 1751 1752 $(D RegionAllocatorStack) is reference counted and has reference semantics. 1753 When the last copy of a given instance goes out of scope, the memory 1754 held by the $(D RegionAllocatorStack) instance is released back to the 1755 heap. This cannot happen before memory allocated to a $(D RegionAllocator) 1756 instance is released back to the stack, because a $(D RegionAllocator) 1757 holds a copy of the $(D RegionAllocatorStack) instance it uses. 1758 1759 Examples: 1760 --- 1761 import std.regionallocator; 1762 1763 void main() { 1764 fun1(); 1765 } 1766 1767 void fun1() { 1768 auto stack = RegionAllocatorStack(1_048_576, GCScan.no); 1769 fun2(stack); 1770 1771 // At the end of fun1, the last copy of the RegionAllocatorStack 1772 // instance pointed to by stack goes out of scope. The memory 1773 // held by stack is released back to the heap. 1774 } 1775 1776 void fun2(RegionAllocatorStack stack) { 1777 auto alloc = stack.newRegionAllocator(); 1778 auto arr = alloc.newArray!(double[])(1_024); 1779 1780 // At the end of fun2, the last copy of the RegionAllocator instance 1781 // pointed to by alloc goes out of scope. The memory used by arr 1782 // is released back to stack. 1783 } 1784 --- 1785 */ 1786 struct RegionAllocatorStack { 1787 private: 1788 RefCounted!(RegionAllocatorStackImpl, RefCountedAutoInitialize.no) impl; 1789 bool initialized; 1790 bool _gcScanned; 1791 1792 public: 1793 /** 1794 Create a new $(D RegionAllocatorStack) with a given segment size in bytes. 1795 */ 1796 this(size_t segmentSize, GCScan shouldScan) { 1797 this._gcScanned = shouldScan; 1798 if(segmentSize == 0) { 1799 throw new RegionAllocatorException( 1800 "Cannot create a RegionAllocatorStack with segment size of 0.", 1801 __FILE__, __LINE__ 1802 ); 1803 } 1804 1805 impl = typeof(impl)(segmentSize, shouldScan); 1806 initialized = true; 1807 } 1808 1809 /** 1810 Creates a new $(D RegionAllocator) region using this stack. 1811 */ 1812 RegionAllocator newRegionAllocator() { 1813 if(!initialized) { 1814 throw new RegionAllocatorException( 1815 "Cannot create a RegionAllocator from an " ~ 1816 "uninitialized RegionAllocatorStack. Did you call " ~ 1817 "RegionAllocatorStack's constructor?", 1818 __FILE__, 1819 __LINE__ 1820 ); 1821 } 1822 1823 return RegionAllocator(this); 1824 } 1825 1826 /** 1827 Whether this stack is scanned by the garbage collector. 1828 */ 1829 bool gcScanned() @property const pure nothrow @safe { 1830 return _gcScanned; 1831 } 1832 } 1833 1834 private struct RegionAllocatorStackImpl { 1835 1836 this(size_t segmentSize, GCScan shouldScan) { 1837 this.segmentSize = segmentSize; 1838 space = alignedMalloc(segmentSize, shouldScan); 1839 1840 // We don't need 16-byte alignment for the bookkeeping array. 1841 immutable nBookKeep = segmentSize / alignBytes; 1842 bookkeep = (cast(SizetPtr*) core.stdc.stdlib.malloc(nBookKeep)) 1843 [0..nBookKeep / SizetPtr.sizeof]; 1844 1845 if(!bookkeep.ptr) { 1846 outOfMemory(); 1847 } 1848 1849 nBlocks++; 1850 } 1851 1852 size_t segmentSize; // The size of each segment. 1853 1854 size_t used; 1855 void* space; 1856 size_t bookkeepIndex; 1857 SizetPtr[] bookkeep; 1858 uint nBlocks; 1859 uint nFree; 1860 size_t regionIndex = size_t.max; 1861 1862 // inUse holds info for all blocks except the one currently being 1863 // allocated from. freeList holds space ptrs for all free blocks. 1864 1865 static struct Block { 1866 size_t used = 0; 1867 void* space = null; 1868 } 1869 1870 SimpleStack!(Block) inUse; 1871 SimpleStack!(void*) freeList; 1872 1873 void doubleSize(ref SizetPtr[] bookkeep) { 1874 size_t newSize = bookkeep.length * 2; 1875 auto ptr = cast(SizetPtr*) core.stdc.stdlib.realloc( 1876 bookkeep.ptr, newSize * SizetPtr.sizeof); 1877 1878 if(!ptr) { 1879 outOfMemory(); 1880 } 1881 1882 bookkeep = ptr[0..newSize]; 1883 } 1884 1885 // Add an element to bookkeep, checking length first. 1886 void putLast(void* last) { 1887 if (bookkeepIndex == bookkeep.length) doubleSize(bookkeep); 1888 bookkeep[bookkeepIndex].p = last; 1889 bookkeepIndex++; 1890 } 1891 1892 void putLast(size_t num) { 1893 return putLast(cast(void*) num); 1894 } 1895 1896 // The number of objects allocated on the C heap instead of here. 1897 ref size_t nLargeObjects() @property pure nothrow { 1898 return bookkeep[regionIndex - 4].i; 1899 } 1900 1901 // The number of extra segments that have been allocated in the current 1902 // region beyond the base segment of the region. 1903 ref size_t nExtraSegments() @property pure nothrow { 1904 return bookkeep[regionIndex - 3].i; 1905 } 1906 1907 // Pushes a segment to the internal free list and frees segments to the 1908 // heap if there are more than 2x as many segments on the free list as 1909 // in use. 1910 void freeSegment() { 1911 nExtraSegments--; 1912 freeList.push(space); 1913 auto newHead = inUse.pop(); 1914 space = newHead.space; 1915 used = newHead.used; 1916 nBlocks--; 1917 nFree++; 1918 1919 if (nFree >= nBlocks * 2) { 1920 foreach(i; 0..nFree / 2) { 1921 alignedFree(freeList.pop); 1922 nFree--; 1923 } 1924 } 1925 } 1926 1927 void destroy() { 1928 if(space) { 1929 alignedFree(space); 1930 space = null; 1931 } 1932 1933 if(bookkeep) { 1934 core.stdc.stdlib.free(bookkeep.ptr); 1935 bookkeep = null; 1936 } 1937 1938 while(inUse.index > 0) { 1939 auto toFree = inUse.pop(); 1940 alignedFree(toFree.space); 1941 } 1942 1943 inUse.destroy(); 1944 1945 while(freeList.index > 0) { 1946 auto toFree = freeList.pop(); 1947 alignedFree(toFree); 1948 } 1949 1950 freeList.destroy(); 1951 } 1952 1953 ~this() { 1954 destroy(); 1955 } 1956 } 1957 1958 /** 1959 These properties get and set the segment size of the default thread-local 1960 $(D RegionAllocatorStack) instance. The default size is 4 megabytes. 1961 The setter is only effective before the global function 1962 $(D newRegionAllocator) has been called for the first time in the current 1963 thread. Attempts to set this property after the first call to this 1964 function from the current thread throw a $(D RegionAllocatorException). 1965 */ 1966 size_t threadLocalSegmentSize() @property nothrow @safe { 1967 return _threadLocalSegmentSize; 1968 } 1969 1970 /// Ditto 1971 size_t threadLocalSegmentSize(size_t newSize) @property @safe { 1972 if(threadLocalInitialized) { 1973 throw new RegionAllocatorException( 1974 "Cannot set threadLocalSegmentSize after the thread-local " ~ 1975 "RegionAllocatorStack has been used for the first time.", 1976 __FILE__, 1977 __LINE__ 1978 ); 1979 } 1980 1981 return _threadLocalSegmentSize = newSize; 1982 } 1983 1984 /** 1985 These properties determine whether the default thread-local 1986 $(D RegionAllocatorStack) instance is scanned by the garbage collector. 1987 The default is no. In most cases, scanning a stack this long-lived is not 1988 recommended, as it will cause too many false pointers. (See $(XREF 1989 regionallocator, GCScan) for details.) 1990 1991 The setter is only effective before the global function 1992 $(D newRegionAllocator) has been called for the first time in the current 1993 thread. Attempts to set this property after the first call to this 1994 function from the current thread throw a $(D RegionAllocatorException). 1995 */ 1996 bool scanThreadLocalStack() @property nothrow @safe { 1997 return _scanThreadLocalStack; 1998 } 1999 2000 /// Ditto 2001 bool scanThreadLocalStack(bool shouldScan) @property @safe { 2002 if(threadLocalInitialized) { 2003 throw new RegionAllocatorException( 2004 "Cannot set scanThreadLocalStack after the thread-local " ~ 2005 "RegionAllocatorStack has been used for the first time.", 2006 __FILE__, 2007 __LINE__ 2008 ); 2009 } 2010 2011 return _scanThreadLocalStack = shouldScan; 2012 } 2013 2014 private size_t _threadLocalSegmentSize = defaultSegmentSize; 2015 private RegionAllocatorStack threadLocalStack; 2016 private bool threadLocalInitialized; 2017 private bool _scanThreadLocalStack = false; 2018 2019 // Ensures the thread-local stack is initialized, then returns it. 2020 private ref RegionAllocatorStack getThreadLocal() { 2021 if(!threadLocalInitialized) { 2022 threadLocalInitialized = true; 2023 threadLocalStack = RegionAllocatorStack( 2024 threadLocalSegmentSize, cast(GCScan) scanThreadLocalStack 2025 ); 2026 } 2027 2028 return threadLocalStack; 2029 } 2030 2031 static ~this() { 2032 if(threadLocalInitialized) { 2033 threadLocalStack.impl.refCountedPayload.destroy(); 2034 } 2035 } 2036 2037 /** 2038 This struct provides an interface to the $(D RegionAllocator) functionality 2039 and enforces scoped deletion. A new instance using the thread-local 2040 $(D RegionAllocatorStack) instance is created using the global 2041 $(D newRegionAllocator) function. A new instance using 2042 an explicitly created $(D RegionAllocatorStack) is created using 2043 $(D RegionAllocatorStack.newRegionAllocator). 2044 2045 Each instance has reference semantics in that any copy will allocate from the 2046 same memory. When the last copy of an instance goes out of scope, all memory 2047 allocated via that instance is freed. Only the most recently created 2048 still-existing $(D RegionAllocator) using a given $(D RegionAllocatorStack) 2049 may be used to allocate and free memory at any given time. Deviations 2050 from this model result in a $(D RegionAllocatorException) being thrown. 2051 2052 An uninitialized $(D RegionAllocator) (for example $(D RegionAllocator.init)) 2053 has semantics similar to a null pointer. It may be assigned to or passed to 2054 a function. However, any attempt to call a method will result in a 2055 $(D RegionAllocatorException) being thrown. 2056 2057 Examples: 2058 --- 2059 void foo() { 2060 auto alloc = newRegionAllocator(); 2061 auto ptr1 = bar(alloc); 2062 auto ptr2 = alloc.allocate(42); 2063 2064 // The last copy of the RegionAllocator object used to allocate ptr1 2065 // and ptr2 is going out of scope here. The memory pointed to by 2066 // both ptr1 and ptr2 will be freed. 2067 } 2068 2069 void* bar(RegionAllocator alloc) { 2070 auto ret = alloc.allocate(42); 2071 2072 auto alloc2 = newRegionAllocator(); 2073 auto ptr3 = alloc2.allocate(42); 2074 2075 // ptr3 was allocated using alloc2, which is going out of scope. 2076 // Its memory will therefore be freed. ret was allocated using alloc. 2077 // A copy of this RegionAllocator is still alive in foo() after 2078 // bar() executes. Therefore, ret will not be freed on returning and 2079 // is still valid after bar() returns. 2080 2081 return ret; 2082 } 2083 2084 void* thisIsSafe() { 2085 // This is safe because the two RegionAllocator objects being used 2086 // are using two different RegionAllocatorStack objects. 2087 auto alloc = newRegionAllocator(); 2088 auto ptr1 = alloc.allocate(42); 2089 2090 auto stack = RegionAllocatorStack(1_048_576, GCScan.no); 2091 auto alloc2 = stack.newRegionAllocator(); 2092 2093 auto ptr2 = alloc2.allocate(42); 2094 auto ptr3 = alloc.allocate(42); 2095 } 2096 2097 void* dontDoThis() { 2098 auto alloc = newRegionAllocator(); 2099 auto ptr1 = alloc.allocate(42); 2100 auto alloc2 = newRegionAllocator(); 2101 2102 // Error: Allocating from a RegionAllocator instance other than the 2103 // most recently created one that's still alive from a given stack. 2104 auto ptr = alloc.allocate(42); 2105 } 2106 2107 void uninitialized() { 2108 RegionAllocator alloc; 2109 auto ptr = alloc.allocate(42); // Error: alloc is not initialized. 2110 auto alloc2 = alloc; // Ok. Both alloc, alloc2 are uninitialized. 2111 2112 alloc2 = newRegionAllocator(); 2113 auto ptr2 = alloc2.allocate(42); // Ok. 2114 auto ptr3 = alloc.allocate(42); // Error: alloc is still uninitialized. 2115 2116 alloc = alloc2; 2117 auto ptr4 = alloc.allocate(42); // Ok. 2118 } 2119 --- 2120 2121 Note: Allocations larger than $(D this.segmentSize) are handled as a special 2122 case and fall back to allocating directly from the C heap. These large 2123 allocations are freed as if they were allocated on a $(D RegionAllocatorStack) 2124 when $(D free) or $(D freeLast) is called or the last copy of a 2125 $(D RegionAllocator) instance goes out of scope. However, due to the extra 2126 bookkeeping required, destroying a region (as happens when the last copy of 2127 a $(D RegionAllocator) instance goes out of scope) will require time linear 2128 instead of constant in the number of allocations for regions where these 2129 large allocations are present. 2130 */ 2131 struct RegionAllocator { 2132 private: 2133 RegionAllocatorStack stack; 2134 2135 // The region index that should be current anytime this instance is 2136 // being used. This is checked for in allocate() and free(). 2137 size_t correctRegionIndex = size_t.max; 2138 2139 this(ref RegionAllocatorStack stack) { 2140 assert(stack.initialized); 2141 auto impl = &(stack.impl.refCountedPayload()); 2142 this.stack = stack; 2143 2144 with(*impl) { 2145 putLast(0); // nLargeObjects. 2146 putLast(0); // nExtraSegments. 2147 putLast(regionIndex); // Old regionIndex. 2148 putLast(1); // Ref count of current RegionAllocator. 2149 regionIndex = bookkeepIndex; 2150 correctRegionIndex = regionIndex; 2151 } 2152 } 2153 2154 // CTFE function, for static assertions. Can't use bsr/bsf b/c it has 2155 // to be usable at compile time. 2156 static bool isPowerOf2(size_t num) pure nothrow @safe { 2157 return num && !(num & (num - 1)); 2158 } 2159 2160 alias RegionAllocatorStackImpl Impl; // Save typing. 2161 2162 // This is written as a mixin instead of a function because it's 2163 // performance critical and it wouldn't be inlinable because it throws. 2164 // By using a mixin, initialized can be checked all the time instead of 2165 // just in debug mode, for negligible performance cost. 2166 enum string getStackImplMixin = q{ 2167 if(!initialized) { 2168 throw new RegionAllocatorException( 2169 "RegionAllocator instance not initialized. Please use " ~ 2170 "newRegionAllocator() to create a RegionAllocator object.", 2171 __FILE__, 2172 __LINE__ 2173 ); 2174 } 2175 2176 auto impl = &(stack.impl.refCountedPayload()); 2177 }; 2178 2179 void incrementRefCount() { 2180 mixin(getStackImplMixin); 2181 impl.bookkeep[correctRegionIndex - 1].i++; 2182 } 2183 2184 void decrementRefCount() { 2185 mixin(getStackImplMixin); 2186 impl.bookkeep[correctRegionIndex - 1].i--; 2187 2188 if(impl.bookkeep[correctRegionIndex - 1].i == 0) { 2189 if(impl.regionIndex != correctRegionIndex) { 2190 throw new RegionAllocatorException( 2191 "Cannot free RegionAlloc regions in non-last in first " ~ 2192 "out order. Did you return a RegionAllocator from a " ~ 2193 "function?", 2194 __FILE__, 2195 __LINE__ 2196 ); 2197 } 2198 2199 with(*impl) { 2200 // Free allocations one at a time until we don't have any 2201 // more large objects. 2202 while (nLargeObjects > 0 && bookkeepIndex > regionIndex) { 2203 assert(bookkeepIndex > regionIndex); 2204 freeLast(); 2205 } 2206 2207 // Free any extra segments that were used by this region. 2208 while(nExtraSegments > 0) { 2209 freeSegment(); 2210 } 2211 2212 if(bookkeepIndex > regionIndex) { 2213 // Then there's something left to free. 2214 used = bookkeep[regionIndex].i - cast(size_t) space; 2215 } 2216 2217 bookkeepIndex = regionIndex - 4; 2218 regionIndex = bookkeep[regionIndex - 2].i; 2219 } 2220 } 2221 } 2222 2223 bool initialized() @property const pure nothrow @safe { 2224 return correctRegionIndex < size_t.max; 2225 } 2226 2227 Unqual!(ElementType!(R))[] arrayImplStack(R)(R range) { 2228 alias ElementType!(R) E; 2229 alias Unqual!(E) U; 2230 static if(hasLength!(R)) { 2231 U[] ret = uninitializedArray!(U[])(range.length); 2232 copy(range, ret); 2233 return ret; 2234 } else { 2235 mixin(getStackImplMixin); 2236 auto startPtr = allocate(0); 2237 size_t bytesCopied = 0; 2238 2239 while(!range.empty) { 2240 auto elem = range.front; 2241 if(impl.used + U.sizeof <= segmentSize) { 2242 range.popFront; 2243 *(cast(U*) (startPtr + bytesCopied)) = elem; 2244 bytesCopied += U.sizeof; 2245 impl.used += U.sizeof; 2246 } else { 2247 if(bytesCopied + U.sizeof >= segmentSize / 2) { 2248 // Then just heap-allocate. 2249 U[] result = (cast(U*) 2250 alignedMalloc(bytesCopied * 2, gcScanned)) 2251 [0..bytesCopied / U.sizeof * 2]; 2252 2253 immutable elemsCopied = bytesCopied / U.sizeof; 2254 result[0..elemsCopied] = (cast(U*) startPtr) 2255 [0..elemsCopied]; 2256 finishCopy(result, range, elemsCopied); 2257 freeLast(); 2258 impl.putLast(result.ptr); 2259 impl.nLargeObjects++; 2260 return result; 2261 } else { 2262 // Force allocation of a new segment. 2263 U[] oldData = (cast(U*) startPtr) 2264 [0..bytesCopied / U.sizeof]; 2265 impl.used -= bytesCopied; 2266 impl.bookkeepIndex--; 2267 U[] arr = uninitializedArray!(U[]) 2268 (bytesCopied / U.sizeof + 1); 2269 arr[0..oldData.length] = oldData[]; 2270 startPtr = impl.space; 2271 arr[$ - 1] = elem; 2272 bytesCopied += U.sizeof; 2273 range.popFront; 2274 } 2275 } 2276 } 2277 auto rem = bytesCopied % .alignBytes; 2278 if(rem != 0) { 2279 auto toAdd = .alignBytes - rem; 2280 if(impl.used + toAdd < RegionAllocator.segmentSize) { 2281 impl.used += toAdd; 2282 } else { 2283 impl.used = RegionAllocator.segmentSize; 2284 } 2285 } 2286 return (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 2287 } 2288 } 2289 2290 Unqual!(ElementType!(R))[] arrayImplHeap(R)(R range) { 2291 // Initial guess of how much space to allocate. It's relatively large 2292 // b/c the object will be short lived, so speed is more important than 2293 // space efficiency. 2294 enum initialGuess = 128; 2295 2296 mixin(getStackImplMixin); 2297 alias Unqual!(ElementType!R) E; 2298 auto arr = (cast(E*) alignedMalloc(E.sizeof * initialGuess, true)) 2299 [0..initialGuess]; 2300 2301 finishCopy(arr, range, 0); 2302 impl.putLast(arr.ptr); 2303 impl.nLargeObjects++; 2304 return arr; 2305 } 2306 2307 public: 2308 2309 this(this) { 2310 if(initialized) incrementRefCount(); 2311 } 2312 2313 ~this() { 2314 if(initialized) decrementRefCount(); 2315 } 2316 2317 void opAssign(RegionAllocator rhs) { 2318 if(initialized) decrementRefCount(); 2319 this.stack = rhs.stack; 2320 this.correctRegionIndex = rhs.correctRegionIndex; 2321 if(initialized) incrementRefCount(); 2322 } 2323 2324 /** 2325 Allocates $(D nBytes) bytes on the $(D RegionAllocatorStack) used by this 2326 $(D RegionAllocator) instance. The last block allocated from this 2327 $(D RegionAllocator) instance can be freed by calling 2328 $(D RegionAllocator.free) or $(D RegionAllocator.freeLast) or will be 2329 automatically freed when the last copy of this $(D RegionAllocator) 2330 instance goes out of scope. 2331 2332 Allocation requests larger than $(D segmentSize) are 2333 allocated directly on the C heap, are scanned by the GC iff 2334 the $(D RegionAllocatorStack) instance that this object uses is scanned by 2335 the GC, and are freed according to the same rules as described above. 2336 */ 2337 void* allocate(size_t nBytes) { 2338 mixin(getStackImplMixin); 2339 if(impl.regionIndex != this.correctRegionIndex) { 2340 throw new RegionAllocatorException( 2341 "Cannot allocate memory from a RegionAllocator that is not " ~ 2342 "currently at the top of the stack.", 2343 __FILE__, 2344 __LINE__ 2345 ); 2346 } 2347 2348 nBytes = allocSize(nBytes); 2349 with(*impl) { 2350 void* ret; 2351 if (segmentSize - used >= nBytes) { 2352 ret = space + used; 2353 used += nBytes; 2354 } else if (nBytes > segmentSize) { 2355 ret = alignedMalloc(nBytes, gcScanned); 2356 impl.nLargeObjects++; 2357 } else if (nFree > 0) { 2358 nExtraSegments++; 2359 inUse.push(Block(used, space)); 2360 space = freeList.pop; 2361 used = nBytes; 2362 nFree--; 2363 nBlocks++; 2364 ret = space; 2365 } else { // Allocate more space. 2366 nExtraSegments++; 2367 inUse.push(Block(used, space)); 2368 space = alignedMalloc(segmentSize, gcScanned); 2369 nBlocks++; 2370 used = nBytes; 2371 ret = space; 2372 } 2373 putLast(ret); 2374 return ret; 2375 } 2376 } 2377 2378 /** 2379 Frees the last block of memory allocated by the current 2380 $(D RegionAllocator). Throws a $(D RegionAllocatorException) if 2381 this $(D RegionAllocator) is not the most recently created still-existing 2382 $(D RegionAllocator) using its $(D RegionAllocatorStack) instance. 2383 */ 2384 void freeLast() { 2385 mixin(getStackImplMixin); 2386 if(impl.regionIndex != this.correctRegionIndex) { 2387 throw new RegionAllocatorException( 2388 "Cannot free memory to a RegionAllocator that is not " ~ 2389 "currently at the top of the stack, or memory that has not " ~ 2390 "been allocated with this instance.", 2391 __FILE__, 2392 __LINE__ 2393 ); 2394 } 2395 2396 with(*impl) { 2397 auto lastPos = bookkeep[--bookkeepIndex].p; 2398 2399 // Handle large blocks. 2400 if(lastPos > space + segmentSize || lastPos < space) { 2401 alignedFree(lastPos); 2402 impl.nLargeObjects--; 2403 return; 2404 } 2405 2406 used = (cast(size_t) lastPos) - (cast(size_t) space); 2407 if (nBlocks > 1 && used == 0) { 2408 impl.freeSegment(); 2409 } 2410 } 2411 } 2412 2413 /** 2414 Checks that $(D ptr) is a pointer to the block that would be freed by 2415 $(D freeLast) then calls $(D freeLast). Throws a 2416 $(D RegionAllocatorException) if the pointer does not point to the 2417 block that would be freed by $(D freeLast). 2418 */ 2419 void free(void* ptr) { 2420 mixin(getStackImplMixin); 2421 auto lastPos = impl.bookkeep[impl.bookkeepIndex - 1].p; 2422 if(ptr !is lastPos) { 2423 throw new RegionAllocatorException( 2424 "Cannot free RegionAllocator memory in non-LIFO order.", 2425 __FILE__, 2426 __LINE__ 2427 ); 2428 } 2429 2430 freeLast(); 2431 } 2432 2433 /** 2434 Attempts to resize a previously allocated block of memory in place. 2435 This is possible only if $(D ptr) points to the beginning of the last 2436 block allocated by this $(D RegionAllocator) instance and, in the 2437 case where $(D newSize) is greater than the old size, there is 2438 additional space in the segment that $(D ptr) was allocated from. 2439 Additionally, blocks larger than this $(D RegionAllocator)'s segment size 2440 cannot be grown or shrunk. 2441 2442 Returns: True if the block was successfully resized, false otherwise. 2443 */ 2444 bool resize(const(void)* ptr, size_t newSize) { 2445 mixin(getStackImplMixin); 2446 2447 // This works since we always allocate in increments of alignBytes 2448 // no matter what the allocation size. 2449 newSize = allocSize(newSize); 2450 2451 with(*impl) { 2452 auto lastPos = bookkeep[bookkeepIndex - 1].p; 2453 if(ptr !is lastPos) { 2454 return false; 2455 } 2456 2457 // If it's a large block directly allocated on the heap, 2458 // then we definitely can't resize it in place. 2459 if(lastPos > space + segmentSize || lastPos < space) { 2460 return false; 2461 } 2462 2463 immutable blockSize = used - ((cast(size_t) lastPos) - 2464 cast(size_t) space); 2465 immutable sizediff_t diff = newSize - blockSize; 2466 2467 if(cast(sizediff_t) (impl.segmentSize - used) >= diff) { 2468 used += diff; 2469 return true; 2470 } 2471 } 2472 2473 return false; 2474 } 2475 2476 /** 2477 Returns whether the $(D RegionAllocatorStack) used by this 2478 $(D RegionAllocator) instance is scanned by the garbage collector. 2479 */ 2480 bool gcScanned() @property const pure nothrow @safe { 2481 return stack.gcScanned; 2482 } 2483 2484 /**Allocates an array of type $(D T). $(D T) may be a multidimensional 2485 array. In this case sizes may be specified for any number of dimensions 2486 from 1 to the number in $(D T). 2487 2488 Examples: 2489 --- 2490 auto alloc = newRegionAllocator(); 2491 double[] arr = alloc.newArray!(double[])(100); 2492 assert(arr.length == 100); 2493 2494 double[][] matrix = alloc.newArray!(double[][])(42, 31); 2495 assert(matrix.length == 42); 2496 assert(matrix[0].length == 31); 2497 --- 2498 */ 2499 auto newArray(T, I...)(I sizes) 2500 if(allSatisfy!(isIntegral, I)) { 2501 2502 static void initialize(R)(R toInitialize) { 2503 static if(isArray!(ElementType!R)) { 2504 foreach(elem; toInitialize) { 2505 initialize(elem); 2506 } 2507 } else { 2508 toInitialize[] = ElementType!(R).init; 2509 } 2510 } 2511 2512 auto ret = uninitializedArray!(T, I)(sizes); 2513 initialize(ret); 2514 return ret; 2515 } 2516 2517 /** 2518 Same as $(D newArray), except skips initialization of elements for 2519 performance reasons. 2520 */ 2521 auto uninitializedArray(T, I...)(I sizes) 2522 if(allSatisfy!(isIntegral, I)) { 2523 static assert(sizes.length >= 1, 2524 "Cannot allocate an array without the size of at least the first " ~ 2525 " dimension."); 2526 static assert(sizes.length <= nDims!T, 2527 to!string(sizes.length) ~ " dimensions specified for a " ~ 2528 to!string(nDims!T) ~ " dimensional array."); 2529 2530 alias typeof(T.init[0]) E; 2531 2532 auto ptr = cast(E*) allocate(sizes[0] * E.sizeof); 2533 auto ret = ptr[0..sizes[0]]; 2534 2535 static if(sizes.length > 1) { 2536 foreach(ref elem; ret) { 2537 elem = uninitializedArray!(E)(sizes[1..$]); 2538 } 2539 } 2540 2541 return ret; 2542 } 2543 2544 /** 2545 Returns the number of bytes to which an allocation of size nBytes is 2546 guaranteed to be aligned. 2547 */ 2548 static size_t alignBytes(size_t nBytes) { 2549 return .alignBytes; 2550 } 2551 2552 /** 2553 Returns the number of bytes used to satisfy an allocation request 2554 of $(D nBytes). Will return a value greater than or equal to 2555 $(D nBytes) to account for alignment overhead. 2556 */ 2557 static size_t allocSize(size_t nBytes) pure nothrow { 2558 static assert(isPowerOf2(.alignBytes)); 2559 return (nBytes + (.alignBytes - 1)) & (~(.alignBytes - 1)); 2560 } 2561 2562 /** 2563 False because memory allocated by this allocator is not automatically 2564 reclaimed by the garbage collector. 2565 */ 2566 enum isAutomatic = false; 2567 2568 /** 2569 True because, when the last last copy of a $(RegionAllocator) instance 2570 goes out of scope, the memory it references is automatically freed. 2571 */ 2572 enum isScoped = true; 2573 2574 /** 2575 True because if memory is freed via $(D free()) instead of $(D freeLast()) 2576 then the pointer is checked for validity. 2577 */ 2578 enum freeIsChecked = true; 2579 2580 /** 2581 Returns the segment size of the $(D RegionAllocatorStack) used by this 2582 $(D RegionAllocator). 2583 */ 2584 size_t segmentSize() @property { 2585 mixin(getStackImplMixin); 2586 return impl.segmentSize; 2587 } 2588 2589 /** 2590 Returns the maximum number of bytes that may be allocated in the 2591 current segment. 2592 */ 2593 size_t segmentSlack() @property { 2594 mixin(getStackImplMixin); 2595 return impl.segmentSize - impl.used; 2596 } 2597 2598 /** 2599 Copies $(D range) to an array. The array will be located on the 2600 $(D RegionAllocator) stack if any of the following conditions apply: 2601 2602 1. $(D std.traits.hasIndirections!(ElementType!R)) is false. 2603 2604 2. $(D R) is a builtin array. In this case $(D range) maintains pointers 2605 to all elements at least until $(D array) returns, preventing the 2606 elements from being freed by the garbage collector. A similar 2607 assumption cannot be made for ranges other than builtin arrays. 2608 2609 3. The $(D RegionAllocatorStack) instance used by this 2610 $(D RegionAllocator) is scanned by the garbage collector. 2611 2612 If none of these conditions is met, the array is returned on the C heap 2613 and $(D GC.addRange) is called. In either case, $(D RegionAllocator.free), 2614 $(D RegionAllocator.freeLast), or the last copy of this $(D RegionAllocator) 2615 instance going out of scope will free the array as if it had been 2616 allocated on the $(D RegionAllocator) stack. 2617 2618 Rationale: The most common reason to call $(D array) on a builtin array is 2619 to modify its contents inside a function without affecting the 2620 caller's view. In this case $(D range) is not modified and 2621 prevents the elements from being freed by the garbage 2622 collector. Furthermore, if the copy returned does need 2623 to be scanned, the client can call $(D GC.addRange) before 2624 modifying the original array. 2625 2626 Examples: 2627 --- 2628 auto alloc = newRegionAllocator(); 2629 auto arr = alloc.array(iota(5)); 2630 assert(arr == [0, 1, 2, 3, 4]); 2631 --- 2632 */ 2633 Unqual!(ElementType!(R))[] array(R)(R range) if(isInputRange!R) { 2634 alias Unqual!(ElementType!(R)) E; 2635 if(gcScanned || !hasIndirections!E || isArray!R) { 2636 return arrayImplStack(range); 2637 } else { 2638 return arrayImplHeap(range); 2639 } 2640 } 2641 } 2642 2643 /** 2644 Returns a new $(D RegionAllocator) that uses the default thread-local 2645 $(D RegionAllocatorStack) instance. 2646 */ 2647 RegionAllocator newRegionAllocator() { 2648 return RegionAllocator(getThreadLocal()); 2649 } 2650 2651 // Finishes copying a range to a C heap allocated array. Assumes the first 2652 // half of the input array is stuff already copied and the second half is 2653 // free space. 2654 private void finishCopy(T, U)(ref T[] result, U range, size_t alreadyCopied) { 2655 void doRealloc() { 2656 auto newPtr = cast(T*) alignedRealloc( 2657 result.ptr, result.length * T.sizeof * 2, result.length * T.sizeof 2658 ); 2659 result = newPtr[0..result.length * 2]; 2660 } 2661 2662 auto index = alreadyCopied; 2663 foreach(elem; range) { 2664 if(index == result.length) doRealloc(); 2665 result[index++] = elem; 2666 } 2667 2668 result = result[0..index]; 2669 } 2670 2671 unittest { 2672 auto alloc = newRegionAllocator(); 2673 auto arr = alloc.array(iota(5)); 2674 assert(arr == [0, 1, 2, 3, 4]); 2675 2676 // Create quick and dirty finite but lengthless range. 2677 static struct Count { 2678 uint num; 2679 uint upTo; 2680 @property size_t front() { 2681 return num; 2682 } 2683 void popFront() { 2684 num++; 2685 } 2686 @property bool empty() { 2687 return num >= upTo; 2688 } 2689 } 2690 2691 alloc.allocate(1024 * 1024 * 3); 2692 Count count; 2693 count.upTo = 1024 * 1025; 2694 auto asArray = alloc.array(count); 2695 foreach(i, elem; asArray) { 2696 assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem)); 2697 } 2698 assert(asArray.length == 1024 * 1025); 2699 alloc.freeLast(); 2700 alloc.freeLast(); 2701 2702 while(alloc.stack.impl.refCountedPayload.freeList.index > 0) { 2703 alignedFree(alloc.stack.impl.refCountedPayload.freeList.pop()); 2704 } 2705 } 2706 2707 unittest { 2708 auto alloc = newRegionAllocator(); 2709 double[] arr = alloc.uninitializedArray!(double[])(100); 2710 assert(arr.length == 100); 2711 2712 double[][] matrix = alloc.uninitializedArray!(double[][])(42, 31); 2713 assert(matrix.length == 42); 2714 assert(matrix[0].length == 31); 2715 2716 double[][] mat2 = alloc.newArray!(double[][])(3, 1); 2717 assert(mat2.length == 3); 2718 assert(mat2[0].length == 1); 2719 2720 import std.math; 2721 assert(isNaN(mat2[0][0])); 2722 } 2723 2724 unittest { 2725 /* Not a particularly good unittest in that it depends on knowing the 2726 * internals of RegionAllocator, but it's the best I could come up w/. This 2727 * is really more of a stress test/sanity check than a normal unittest.*/ 2728 2729 // Make sure state is completely reset. 2730 destroy(threadLocalStack.impl); 2731 threadLocalStack = RegionAllocatorStack.init; 2732 threadLocalInitialized = false; 2733 2734 // First test to make sure a large number of allocations does what it's 2735 // supposed to in terms of reallocing bookkeep[], etc. 2736 enum nIter = defaultSegmentSize * 5 / alignBytes; 2737 2738 { 2739 auto alloc = newRegionAllocator(); 2740 foreach(i; 0..nIter) { 2741 alloc.allocate(alignBytes); 2742 } 2743 assert(alloc.stack.impl.refCountedPayload.nBlocks == 5, 2744 to!string(alloc.stack.impl.refCountedPayload.nBlocks)); 2745 assert(alloc.stack.impl.refCountedPayload.nFree == 0); 2746 foreach(i; 0..nIter) { 2747 alloc.freeLast(); 2748 } 2749 assert(alloc.stack.impl.refCountedPayload.nBlocks == 1); 2750 assert(alloc.stack.impl.refCountedPayload.nFree == 2); 2751 2752 // Make sure logic for freeing excess blocks works. If it doesn't this 2753 // test will run out of memory. 2754 enum allocSize = defaultSegmentSize / 2; 2755 foreach(i; 0..50) { 2756 foreach(j; 0..50) { 2757 alloc.allocate(allocSize); 2758 } 2759 foreach(j; 0..50) { 2760 alloc.freeLast(); 2761 } 2762 } 2763 2764 // Make sure data is stored properly. 2765 foreach(i; 0..10) { 2766 alloc.allocate(allocSize); 2767 } 2768 foreach(i; 0..5) { 2769 alloc.freeLast(); 2770 } 2771 void* space = alloc.stack.impl.refCountedPayload.space; 2772 size_t used = alloc.stack.impl.refCountedPayload.used; 2773 2774 { 2775 auto alloc2 = newRegionAllocator(); 2776 auto arrays = new uint[][10]; 2777 2778 foreach(i; 0..10) { 2779 uint[] data = alloc2.uninitializedArray!(uint[])(250_000); 2780 foreach(j, ref e; data) { 2781 e = cast(uint) (j * (i + 1)); 2782 } 2783 arrays[i] = data; 2784 } 2785 2786 // Make stuff get overwrriten if blocks are getting GC'd when 2787 // they're not supposed to. 2788 GC.minimize; // Free up all excess pools. 2789 uint[][] foo; 2790 foreach(i; 0..40) { 2791 foo ~= new uint[1_048_576]; 2792 } 2793 foo = null; 2794 2795 for(size_t i = 9; i != size_t.max; i--) { 2796 foreach(j, e; arrays[i]) { 2797 assert(e == j * (i + 1)); 2798 } 2799 } 2800 } 2801 2802 assert(space == alloc.stack.impl.refCountedPayload.space, 2803 text(space, '\t', alloc.stack.impl.refCountedPayload.space)); 2804 assert(used == alloc.stack.impl.refCountedPayload.used, 2805 text(used, '\t', alloc.stack.impl.refCountedPayload.used)); 2806 while(alloc.stack.impl.refCountedPayload.nBlocks > 1 || 2807 alloc.stack.impl.refCountedPayload.used > 0) { 2808 alloc.freeLast(); 2809 } 2810 } 2811 2812 // Test that everything is really getting destroyed properly when 2813 // destroy() is called. If not then this test will run out of memory. 2814 foreach(i; 0..1000) { 2815 destroy(threadLocalStack.impl); 2816 threadLocalInitialized = false; 2817 2818 auto alloc = newRegionAllocator(); 2819 foreach(j; 0..1_000) { 2820 auto ptr = alloc.allocate(20_000); 2821 assert((cast(size_t) ptr) % alignBytes == 0); 2822 } 2823 2824 foreach(j; 0..500) { 2825 alloc.freeLast(); 2826 } 2827 } 2828 } 2829 2830 unittest { 2831 // Make sure the basics of using explicit stacks work. 2832 auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no); 2833 auto alloc = stack.newRegionAllocator(); 2834 auto arr = alloc.array(iota(5)); 2835 assert(arr == [0, 1, 2, 3, 4]); 2836 auto ptr = alloc.allocate(5); 2837 2838 auto alloc2 = newRegionAllocator(); 2839 auto ptr2 = alloc2.allocate(5); 2840 auto ptr3 = alloc.allocate(5); 2841 } 2842 2843 unittest { 2844 // Make sure the stacks get freed properly when they go out of scope. 2845 // If they don't then this will run out of memory. 2846 foreach(i; 0..100_000) { 2847 auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no); 2848 } 2849 } 2850 2851 unittest { 2852 // Make sure resize works properly. 2853 auto alloc = newRegionAllocator(); 2854 auto arr1 = alloc.array(iota(4)); 2855 auto res = alloc.resize(arr1.ptr, 8 * int.sizeof); 2856 assert(res); 2857 arr1 = arr1.ptr[0..8]; 2858 copy(iota(4, 8), arr1[4..$]); 2859 2860 auto arr2 = alloc.newArray!(int[])(8); 2861 2862 // If resizing resizes to something too small, this will have been 2863 // overwritten: 2864 assert(arr1 == [0, 1, 2, 3, 4, 5, 6, 7], text(arr1)); 2865 2866 alloc.free(arr2.ptr); 2867 auto res2 = alloc.resize(arr1.ptr, 4 * int.sizeof); 2868 assert(res2); 2869 arr2 = alloc.newArray!(int[])(8); 2870 2871 // This time the memory in arr1 should have been overwritten. 2872 assert(arr1 == [0, 1, 2, 3, 0, 0, 0, 0]); 2873 } 2874 2875 unittest { 2876 // Make sure that default thread-local stacks get freed properly at the 2877 // termination of a thread. If they don't then this will run out of 2878 // memory. 2879 2880 import core.thread; 2881 foreach(i; 0..100) { 2882 auto fun = { 2883 threadLocalSegmentSize = 100 * 1024 * 1024; 2884 newRegionAllocator(); 2885 }; 2886 2887 auto t = new Thread(fun); 2888 t.start(); 2889 t.join(); 2890 } 2891 } 2892 2893 unittest { 2894 // Make sure assignment works as advertised. 2895 RegionAllocator alloc; 2896 auto alloc2 = newRegionAllocator(); 2897 auto ptr = alloc2.allocate(8); 2898 alloc = alloc2; 2899 alloc.freeLast(); 2900 auto ptr2= alloc2.allocate(8); 2901 assert(ptr is ptr2); 2902 } 2903 2904 // Simple, fast stack w/o error checking. 2905 static struct SimpleStack(T) { 2906 private size_t capacity; 2907 private size_t index; 2908 private T* data; 2909 private enum sz = T.sizeof; 2910 2911 private static size_t max(size_t lhs, size_t rhs) pure nothrow { 2912 return (rhs > lhs) ? rhs : lhs; 2913 } 2914 2915 void push(T elem) { 2916 if (capacity == index) { 2917 capacity = max(16, capacity * 2); 2918 data = cast(T*) core.stdc.stdlib.realloc(data, capacity * sz); 2919 } 2920 data[index++] = elem; 2921 } 2922 2923 T pop() { 2924 index--; 2925 auto ret = data[index]; 2926 return ret; 2927 } 2928 2929 void destroy() { 2930 if(data) { 2931 core.stdc.stdlib.free(data); 2932 data = null; 2933 } 2934 } 2935 } 2936 2937 private void outOfMemory() { 2938 throw new OutOfMemoryError("Out of memory in RegionAllocator."); 2939 } 2940 2941 // Memory allocation routines. These wrap allocate(), free() and realloc(), 2942 // and guarantee alignment. 2943 private enum size_t alignBytes = 16; 2944 2945 private void* alignedMalloc(size_t size, bool shouldAddRange = false) { 2946 // We need (alignBytes - 1) extra bytes to guarantee alignment, 1 byte 2947 // to store the shouldAddRange flag, and ptrSize bytes to store 2948 // the pointer to the beginning of the block. 2949 void* toFree = core.stdc.stdlib.malloc( 2950 alignBytes + ptrSize + size 2951 ); 2952 2953 if(toFree is null) outOfMemory(); 2954 2955 // Add the offset for the flag and the base pointer. 2956 auto intPtr = cast(size_t) toFree + ptrSize + 1; 2957 2958 // Align it. 2959 intPtr = (intPtr + alignBytes - 1) & (~(alignBytes - 1)); 2960 auto ret = cast(void**) intPtr; 2961 2962 // Store base pointer. 2963 (cast(void**) ret)[-1] = toFree; 2964 2965 // Store flag. 2966 (cast(bool*) ret)[-1 - ptrSize] = shouldAddRange; 2967 2968 if(shouldAddRange) { 2969 GC.addRange(ret, size); 2970 } 2971 2972 return ret; 2973 } 2974 2975 private void alignedFree(void* ptr) { 2976 // If it was allocated with alignedMalloc() then the pointer to the 2977 // beginning is at ptr[-1]. 2978 auto addedRange = (cast(bool*) ptr)[-1 - ptrSize]; 2979 2980 if(addedRange) { 2981 GC.removeRange(ptr); 2982 } 2983 2984 core.stdc.stdlib.free( (cast(void**) ptr)[-1]); 2985 } 2986 2987 // This is used by RegionAllocator, but I'm not sure enough that its interface 2988 // isn't going to change to make it public and document it. 2989 private void* alignedRealloc(void* ptr, size_t newLen, size_t oldLen) { 2990 auto storedRange = (cast(bool*) ptr)[-1 - ptrSize]; 2991 auto newPtr = alignedMalloc(newLen, storedRange); 2992 memcpy(newPtr, ptr, oldLen); 2993 2994 alignedFree(ptr); 2995 return newPtr; 2996 } 2997 2998 private union SizetPtr { 2999 size_t i; 3000 void* p; 3001 }