The OpenD Programming Language

1 /**
2 Stuff having to do with memory management.  Mostly a copy of RegionAllocator
3 for now until it gets into Phobos, as well as some RegionAllocator-specific
4 data structures.
5 
6 Author:  David Simcha*/
7  /*
8  * License:
9  * Boost Software License - Version 1.0 - August 17th, 2003
10  *
11  * Permission is hereby granted, free of charge, to any person or organization
12  * obtaining a copy of the software and accompanying documentation covered by
13  * this license (the "Software") to use, reproduce, display, distribute,
14  * execute, and transmit the Software, and to prepare derivative works of the
15  * Software, and to permit third-parties to whom the Software is furnished to
16  * do so, all subject to the following:
17  *
18  * The copyright notices in the Software and this entire statement, including
19  * the above license grant, this restriction and the following disclaimer,
20  * must be included in all copies of the Software, in whole or in part, and
21  * all derivative works of the Software, unless such copies or derivative
22  * works are solely in the form of machine-executable object code generated by
23  * a source language processor.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
28  * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
29  * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
30  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31  * DEALINGS IN THE SOFTWARE.
32  */
33 
34 module dstats.alloc;
35 
36 import std.traits, core.memory, std.array, std.range, core.exception,
37     std.functional, std.math, std.algorithm : max;
38 
39 static import core.stdc.stdlib;
40 import core.stdc.string : memcpy;
41 
42 import dstats.base;
43 
44 version(unittest) {
45     import std.stdio, std.conv, std.random, dstats.sort;
46 }
47 
48 private template Appends(T, U) {
49     enum bool Appends = AppendsImpl!(T, U).ret;
50 }
51 
52 private template AppendsImpl(T, U) {
53     T[] a;
54     U b;
55     enum bool ret = is(typeof(a ~= b));
56 }
57 
58 ///Appends to an array, deleting the old array if it has to be realloced.
59 void appendDelOld(T, U)(ref T[] to, U from)
60 if(Appends!(T, U)) {
61     auto old = to;
62     to ~= from;
63     if (old.ptr !is to.ptr && old.ptr !is null)
64     {
65         import core.memory;
66         static if (__traits(isPOD, T))
67             GC.free(old.ptr);
68         else
69             __delete(old);
70     }
71 }
72 
73 unittest {
74     uint[] foo;
75     foo.appendDelOld(5);
76     foo.appendDelOld(4);
77     foo.appendDelOld(3);
78     foo.appendDelOld(2);
79     foo.appendDelOld(1);
80     assert(foo == cast(uint[]) [5,4,3,2,1]);
81 }
82 
83 private struct SHNode(K, V) {
84     alias SHNode!(K, V) SomeType;
85     SomeType* next;
86     Unqual!(K) key;
87     Unqual!(V) val;
88 }
89 
90 /**Forward range struct for iterating over the keys or values of a
91  * StackHash or StackSet.  The lifetime of this object must not exceed that
92  * of the underlying StackHash or StackSet.*/
93 struct HashRange(K, S, bool vals = false) {
94 private:
95     S* set;
96     size_t index;
97     S.Node* next;
98     K* frontElem;
99     size_t _length;
100 
101     this(S* set) {
102         this.set = set;
103         if(set.rNext[0] == set.usedSentinel) {
104             this.popFront;
105         } else {
106             static if(vals) {
107                 frontElem = set.rVals.ptr;
108             } else {
109                 frontElem = set.rKeys.ptr;
110             }
111             next = set.rNext[0];
112         }
113         this._length = set.length;
114     }
115 
116 public:
117     ///
118     void popFront()
119     in {
120         assert(!empty);
121     } do {
122         this._length--;
123         if(next is null) {
124             do {
125                 index++;
126                 if(index >= set.rNext.length) {
127                     index = size_t.max;  // Sentinel for empty.
128                     return;
129                 }
130                 next = set.rNext[index];
131             } while(set.rNext[index] == set.usedSentinel);
132             static if(vals) {
133                 frontElem = &(set.rVals[index]);
134             } else {
135                 frontElem = &(set.rKeys[index]);
136             }
137         } else {
138             static if(vals) {
139                 frontElem = &(next.val);
140             } else {
141                 frontElem = &(next.key);
142             }
143             next = next.next;
144         }
145     }
146 
147     ///
148     static if(vals) {
149         @property ref Unqual!(K) front()
150         in {
151             assert(!empty);
152         } do {
153             return *frontElem;
154         }
155     } else {
156        @property Unqual!(K) front()
157        in {
158             assert(!empty);
159        } do {
160             return *frontElem;
161         }
162     }
163 
164     ///
165     @property bool empty() {
166         return index == size_t.max;
167     }
168 
169     ///
170     @property size_t length() {
171         return _length;
172     }
173 
174     ///
175     @property typeof(this) save() {
176         return this;
177     }
178 }
179 
180 /**A hash table that allocates its memory on RegionAllocator.  Good for building a
181  * temporary hash tables that will not escape the current scope.
182  *
183  * Examples:
184  * ---
185  * auto alloc = newRegionAllocator();
186  * auto ss = StackHash!(uint)(5, alloc);
187  * foreach(i; 0..5) {
188  *     ss[i]++;
189  * }
190  * assert(ss[3] == 1);
191  * ---
192  *
193  * Warning:
194  * This implementation places removed nodes on an internal free list and
195  * recycles them, since there is no way to delete RegionAllocator-allocated data
196  * in a non-LIFO order.  Therefore, you may not retain the address of a
197  * variable stored in a StackHash after deleting it from the StachHash.
198  * For example, DO NOT do this:
199  * ---
200  * SomeType* myPtr = &(myStackHash["foo"]);
201  * myStackHash.remove("foo");
202  * *myPtr = someValue;
203  * ---
204  */
205 struct StackHash(K, V) {
206 private:
207     alias SHNode!(K, V) Node;
208 
209     // Using parallel arrays instead of structs to save on alignment overhead:
210     Unqual!(K)[] rKeys;
211     Unqual!(V)[] rVals;
212     Unqual!(Node*)[] rNext;
213 
214     // Holds nodes that were deleted by remove().
215     Node** freeList;
216 
217     RegionAllocator alloc;
218     size_t _length;
219 
220     // Tries to allocate off the free list.  Otherwise allocates off
221     // RegionAllocator.
222     Node* allocNode() {
223         if(*freeList is null) {
224             return cast(Node*) alloc.allocate(Node.sizeof);
225         }
226         auto ret = *freeList;
227         *freeList = (*freeList).next;
228         return ret;
229     }
230 
231     // Add a removed node to the free list.
232     void pushFreeList(Node* node) {
233         if(*freeList is null) {
234             node.next = null;  // Sentinel
235             *freeList = node;
236         } else {
237             node.next = *freeList;
238             *freeList = node;
239         }
240     }
241 
242     // rNext.ptr is stored in elements of rNext as a sentinel to indicate
243     // that the corresponding slot is unused.
244     Node* usedSentinel() @property {
245         return cast(Node*) rNext.ptr;
246     }
247 
248     Node* newNode(K key) {
249         Node* ret = allocNode();
250         ret.key =  key;
251         ret.val =  V.init;
252         ret.next = null;
253         return ret;
254     }
255 
256     Node* newNode(K key, V val) {
257         Node* ret = allocNode();
258         ret.key =  key;
259         ret.val = val;
260         ret.next = null;
261         return ret;
262     }
263 
264     hash_t getHash(K key) {
265         static if(is(K : long) && K.sizeof <= hash_t.sizeof) {
266             hash_t hash = cast(hash_t) key;
267         } else static if(__traits(compiles, key.toHash())) {
268             hash_t hash = key.toHash();
269         } else {
270             hash_t hash = typeid(K).getHash(&key);
271         }
272         hash %= rNext.length;
273         return hash;
274     }
275 
276 
277 public:
278     /**Due to the nature of RegionAllocator, you must specify on object creation
279      * the approximate number of elements your table will have.  Too large a
280      * number will waste space and incur poor cache performance.  Too low a
281      * number will make this struct perform like a linked list.  Generally,
282      * if you're building a table from some other range, some fraction of the
283      * size of that range is a good guess.*/
284     this(size_t nElem, RegionAllocator alloc) {
285         // Obviously, the caller can never mean zero, because this struct
286         // can't work at all with nElem == 0, so assume it's a mistake and fix
287         // it here.
288         this.alloc = alloc;
289 
290         if(nElem == 0) nElem++;
291         rKeys = alloc.uninitializedArray!(K[])(nElem);
292         rVals = alloc.uninitializedArray!(V[])(nElem);
293 
294         // Allocate free list in same block with Node ptrs.  That's what the
295         // + 1 is for.
296         rNext = alloc.uninitializedArray!(Node*[])(nElem + 1);
297         freeList = &(rNext[$ - 1]);
298         *freeList = null;
299         rNext = rNext[0..$ - 1];
300 
301         foreach(ref rKey; rKeys) {
302             rKey =  K.init;
303         }
304         foreach(ref rVal; rVals) {
305             rVal = V.init;
306         }
307         foreach(ref r; rNext) {
308             r = usedSentinel;
309         }
310 
311 
312     }
313 
314     /**Index an element of the range.  If it does not exist, it will be created
315      * and initialized to V.init.*/
316     ref V opIndex(K key) {
317         hash_t hash = getHash(key);
318 
319         if(rNext[hash] == usedSentinel) {
320             rKeys[hash] =  key;
321             rNext[hash] = null;
322             _length++;
323             return rVals[hash];
324         } else if(rKeys[hash] == key) {
325             return rVals[hash];
326         } else {  // Collision.  Start chaining.
327             Node** next = &(rNext[hash]);
328             while(*next !is null) {
329                 if((**next).key ==  key) {
330                     return (**next).val;
331                 }
332                 next = &((**next).next);
333             }
334             *next = newNode(key);
335             _length++;
336             return (**next).val;
337         }
338     }
339 
340     ///
341     V opIndexAssign(V val, K key) {
342         hash_t hash = getHash(key);
343 
344         if(rNext[hash] == usedSentinel) {
345             rKeys[hash] =  key;
346             rVals[hash] = val;
347             rNext[hash] = null;
348             _length++;
349             return val;
350         } else if(rKeys[hash] ==  key) {
351             rVals[hash] = val;
352             return val;
353         } else {  // Collision.  Start chaining.
354             Node** next = &(rNext[hash]);
355             while(*next !is null) {
356                 if((**next).key == key) {
357                     (**next).val = val;
358                     return val;
359                 }
360                 next = &((**next).next);
361             }
362             _length++;
363             *next = newNode(key, val);
364             return val;
365         }
366     }
367 
368     ///
369     V* opBinaryRight(string op : "in")(K key) {
370         hash_t hash = getHash(key);
371 
372         if(rNext[hash] == usedSentinel) {
373             return null;
374         } else if(rKeys[hash] == key) {
375             return &(rVals[hash]);
376         } else {  // Collision.  Start chaining.
377             Node* next = rNext[hash];
378             while(next !is null) {
379                 if(next.key == key) {
380                     return &(next.val);
381                 }
382                 next = next.next;
383             }
384             return null;
385         }
386    }
387 
388     ///
389     void remove(K key) {
390         hash_t hash = getHash(key);
391 
392         Node** next = &(rNext[hash]);
393         if(rNext[hash] == usedSentinel) {
394             return;
395         } else if(rKeys[hash] == key) {
396             _length--;
397             if(rNext[hash] is null) {
398                 rKeys[hash] = K.init;
399                 rVals[hash] = V.init;
400                 rNext[hash] = usedSentinel;
401                 return;
402             } else {
403                 Node* toPush = *next;
404 
405                 rKeys[hash] = (**next).key;
406                 rVals[hash] = (**next).val;
407                 rNext[hash] = (**next).next;
408 
409                 pushFreeList(toPush);
410                 return;
411             }
412         } else {  // Collision.  Start chaining.
413             while(*next !is null) {
414                 if((**next).key == key) {
415                     _length--;
416 
417                     Node* toPush = *next;
418                     *next = (**next).next;
419 
420                     pushFreeList(toPush);
421                     break;
422                 }
423                 next = &((**next).next);
424             }
425             return;
426         }
427    }
428 
429     /**Returns a forward range to iterate over the keys of this table.
430      * The lifetime of this range must not exceed the lifetime of this
431      * StackHash.*/
432     auto keys() {
433         return HashRange!(K, StackHash!(K, V))(&this);
434     }
435 
436     /**Returns a forward range to iterate over the values of this table.
437      * The lifetime of this range must not exceed the lifetime of this
438      * StackHash.*/
439     auto values() {
440        return HashRange!(V, StackHash!(K, V), true)(&this);
441     }
442 
443     ///
444     @property size_t length() const {
445         return _length;
446     }
447 
448     /**
449     Attempt to look up a key and return a default value if the key is not
450     present.
451     */
452     V get(K key, lazy V defaultValue) {
453         auto ptr = key in this;
454         if(ptr) return *ptr;
455         return defaultValue;
456     }
457 
458     int opApply(int delegate(ref K, ref V) dg) {
459         auto k = this.keys;
460         auto v = this.values;
461         int res;
462 
463         while(!k.empty) {
464             auto kFront = k.front;
465             res = dg(kFront, v.front);
466             k.popFront;
467             v.popFront;
468             if(res) {
469                 break;
470             }
471         }
472 
473         return res;
474     }
475 
476     real efficiency() {
477        uint used = 0;
478        foreach(root; rNext) {
479            if(root != usedSentinel) {
480                used++;
481            }
482        }
483        return cast(real) used / rNext.length;
484     }
485 }
486 
487 unittest {
488     alias StackHash!(string, uint) mySh;
489 
490     {  // Basic sanity checks.
491         auto alloc = newRegionAllocator();
492         auto data = mySh(2, alloc);  // Make sure we get some collisions.
493         data["foo"] = 1;
494         data["bar"] = 2;
495         data["baz"] = 3;
496         data["waldo"] = 4;
497         assert(!("foobar" in data));
498         assert(*("foo" in data) == 1);
499         assert(*("bar" in data) == 2);
500         assert(*("baz" in data) == 3);
501         assert(*("waldo" in data) == 4);
502         assert(data["foo"] == 1);
503         assert(data["bar"] == 2);
504         assert(data["baz"] == 3);
505         assert(data["waldo"] == 4);
506         auto myKeys = array(data.keys);
507         qsort(myKeys);
508         assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]);
509         auto myValues = array(data.values);
510         qsort(myValues);
511         assert(myValues == [1U, 2, 3, 4]);
512         {
513             auto k = data.keys;
514             auto v = data.values;
515             while(!k.empty) {
516                 assert(data[k.front] == v.front);
517                 k.popFront;
518                 v.popFront;
519             }
520         }
521         foreach(v; data.values) {
522             assert(v > 0 && v < 5);
523         }
524     }
525 
526     alias StackHash!(uint, uint) mySh2;
527     {   // Test remove.
528         auto alloc = newRegionAllocator();
529 
530         auto foo = mySh2(7, alloc);
531         for(uint i = 0; i < 200; i++) {
532             foo[i] = i;
533         }
534         assert(foo.length == 200);
535         for(uint i = 0; i < 200; i += 2) {
536             foo.remove(i);
537         }
538         foreach(i; 20..200) {
539             foo.remove(i);
540         }
541         for(uint i = 0; i < 20; i++) {
542             if(i & 1) {
543                 assert(i in foo);
544                 assert(*(i in foo) == i);
545             } else {
546                 assert(!(i in foo));
547             }
548         }
549         auto vals = array(foo.values);
550         assert(foo.length == 10);
551         assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]);
552     }
553 
554     { // Monte carlo unittesting against builtin hash table.
555         auto alloc = newRegionAllocator();
556         uint[uint] builtin;
557         auto monteSh = mySh2(20_000, alloc);
558         uint[] nums = alloc.uninitializedArray!(uint[])(100_000);
559         foreach(ref num; nums) {
560             num = uniform(0U, uint.max);
561         }
562 
563         foreach(i; 0..1_000_000) {
564             auto index = uniform(0, cast(uint) nums.length);
565             if(index in builtin) {
566                 assert(index in monteSh);
567                 assert(builtin[index] == nums[index]);
568                 assert(monteSh[index] == nums[index]);
569                 builtin.remove(index);
570                 monteSh.remove(index);
571             } else {
572                 assert(!(index in monteSh));
573                 builtin[index] = nums[index];
574                 monteSh[index] = nums[index];
575             }
576         }
577 
578         assert(builtin.length == monteSh.length);
579         foreach(k, v; builtin) {
580             assert(k in monteSh);
581             assert(*(k in builtin) == *(k in monteSh));
582             assert(monteSh[k] == v);
583         }
584 
585         // Make sure nothing is missed in iteration.  Since both keys and
586         // values use the same struct, just with a few static if statements,
587         // if it works for keys and simple tests work for values, it works.
588         foreach(k; monteSh.keys) {
589             builtin.remove(k);
590         }
591         assert(builtin.length == 0);
592 
593     }
594 }
595 
596 /**A hash set that allocates its memory on RegionAllocator.  Good for building a
597  * temporary set that will not escape the current scope.
598  *
599  * Examples:
600  * ---
601  * auto alloc = newRegionAllocator();
602  * auto ss = StackSet!(uint)(5, alloc);
603  * foreach(i; 0..5) {
604  *     ss.insert(i);
605  * }
606  * assert(3 in ss);
607  * ---
608  */
609 struct StackSet(K) {
610 private:
611     // Choose smallest representation of the data.
612     struct Node1 {
613         Node1* next;
614         K key;
615     }
616 
617     struct Node2 {
618         K key;
619         Node2* next;
620     }
621 
622     static if(Node1.sizeof < Node2.sizeof) {
623         alias Node1 Node;
624     } else {
625         alias Node2 Node;
626     }
627 
628     Unqual!(K)[] rKeys;
629     Node*[] rNext;
630 
631     Node** freeList;
632     size_t _length;
633     RegionAllocator alloc;
634 
635     Node* usedSentinel() {
636         return cast(Node*) rNext.ptr;
637     }
638 
639     // Tries to allocate off the free list.  Otherwise allocates off
640     // RegionAllocator.
641     Node* allocNode() {
642         if(*freeList is null) {
643             return cast(Node*) alloc.allocate(Node.sizeof);
644         }
645         auto ret = *freeList;
646         *freeList = (*freeList).next;
647         return ret;
648     }
649 
650     // Add a removed node to the free list.
651     void pushFreeList(Node* node) {
652         if(*freeList is null) {
653             node.next = null;  // Sentinel
654             *freeList = node;
655         } else {
656             node.next = *freeList;
657             *freeList = node;
658         }
659     }
660 
661     Node* newNode(K key) {
662         Node* ret = allocNode();
663         ret.key = key;
664         ret.next = null;
665         return ret;
666     }
667 
668     hash_t getHash(K key) {
669         static if(is(K : long) && K.sizeof <= hash_t.sizeof) {
670             hash_t hash = cast(hash_t) key;
671         } else static if(__traits(compiles, key.toHash())) {
672             hash_t hash = key.toHash();
673         } else {
674             hash_t hash = typeid(K).getHash(&key);
675         }
676         hash %= rNext.length;
677         return hash;
678     }
679 
680 public:
681     /**Due to the nature of RegionAllocator, you must specify on object creation
682      * the approximate number of elements your set will have.  Too large a
683      * number will waste space and incur poor cache performance.  Too low a
684      * number will make this struct perform like a linked list.  Generally,
685      * if you're building a set from some other range, some fraction of the
686      * size of that range is a good guess.*/
687     this(size_t nElem, RegionAllocator alloc) {
688         this.alloc = alloc;
689 
690         // Obviously, the caller can never mean zero, because this struct
691         // can't work at all with nElem == 0, so assume it's a mistake and fix
692         // it here.
693         if(nElem == 0) nElem++;
694 
695         // Allocate the free list as the last element of rNext.
696         rNext = alloc.uninitializedArray!(Node*[])(nElem + 1);
697         freeList = &(rNext[$ - 1]);
698         *freeList = null;
699         rNext = rNext[0..$ - 1];
700 
701         foreach(ref root; rNext) {
702             root = usedSentinel;
703         }
704 
705         rKeys = alloc.uninitializedArray!(Unqual!(K)[])(nElem);
706         foreach(ref root; rKeys) {
707             root = K.init;
708         }
709     }
710 
711     ///
712     void insert(K key) {
713         hash_t hash = getHash(key);
714 
715         if(rNext[hash] == usedSentinel) {
716             rKeys[hash] = key;
717             rNext[hash] = null;
718             _length++;
719             return;
720         } else if(rKeys[hash] == key) {
721             return;
722         } else {  // Collision.  Start chaining.
723             Node** next = &(rNext[hash]);
724             while(*next !is null) {
725                 if((**next).key == key) {
726                     return;
727                 }
728                 next = &((**next).next);
729             }
730             *next = newNode(key);
731             _length++;
732             return;
733         }
734     }
735 
736     /**Returns a forward range of the elements of this struct.  The range's
737      * lifetime must not exceed the lifetime of this object.*/
738     auto elems() {
739         return HashRange!(K, typeof(this))(&this);
740     }
741 
742     ///
743     bool opBinaryRight(string op : "in")(K key) {
744         hash_t hash = getHash(key);
745 
746         if(rNext[hash] == usedSentinel) {
747             return false;
748         } else if(rKeys[hash] == key) {
749             return true;
750         } else {  // Collision.  Start chaining.
751             Node* next = rNext[hash];
752             while(next !is null) {
753                 if(next.key == key) {
754                     return true;
755                 }
756                 next = next.next;
757             }
758             return false;
759         }
760    }
761 
762     ///
763     void remove(K key) {
764         hash_t hash = getHash(key);
765 
766         Node** next = &(rNext[hash]);
767         if(rNext[hash] == usedSentinel) {
768             return;
769         } else if(rKeys[hash] == key) {
770             _length--;
771             if(rNext[hash] is null) {
772                 rKeys[hash] = K.init;
773                 rNext[hash] = usedSentinel;
774                 return;
775             } else {
776                 Node* toPush = *next;
777 
778                 rKeys[hash] = (**next).key;
779                 rNext[hash] = (**next).next;
780 
781                 pushFreeList(toPush);
782                 return;
783             }
784         } else {  // Collision.  Start chaining.
785             while(*next !is null) {
786                 if((**next).key == key) {
787                     _length--;
788                     Node* toPush = *next;
789 
790                     *next = (**next).next;
791                     pushFreeList(toPush);
792                     break;
793                 }
794                 next = &((**next).next);
795             }
796             return;
797         }
798    }
799 
800     ///
801     @property size_t length() {
802        return _length;
803     }
804 }
805 
806 unittest {
807     { // "Normal" unittesting.
808         auto alloc = newRegionAllocator();
809         alias StackSet!(uint) mySS;
810         mySS set = mySS(12, alloc);
811         foreach(i; 0..20) {
812             set.insert(i);
813         }
814         assert(array(set.elems).qsort == seq(0U, 20U));
815 
816         for(uint i = 0; i < 20; i += 2) {
817             set.remove(i);
818         }
819 
820         foreach(i; 0..20) {
821             if(i & 1) {
822                 assert(i in set);
823             } else {
824                 assert(!(i in set));
825             }
826         }
827         uint[] contents;
828 
829         foreach(elem; set.elems) {
830             contents ~= elem;
831         }
832         assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]);
833     }
834 
835     { // Monte carlo unittesting against builtin hash table.
836         auto alloc = newRegionAllocator();
837         bool[uint] builtin;
838         auto monteSh = StackSet!uint(20_000, alloc);
839 
840         foreach(i; 0..1_000_000) {
841             auto index = uniform(0, 100_000);
842             if(index in builtin) {
843                 assert(index in monteSh);
844                 builtin.remove(index);
845                 monteSh.remove(index);
846             } else {
847                 assert(!(index in monteSh));
848                 builtin[index] = 1;
849                 monteSh.insert(index);
850             }
851         }
852 
853         assert(builtin.length == monteSh.length);
854         foreach(k, v; builtin) {
855             assert(k in monteSh);
856         }
857 
858         foreach(k; monteSh.elems) {
859             builtin.remove(k);
860         }
861         assert(builtin.length == 0);
862     }
863 }
864 
865 private int height(T)(const T node) nothrow {
866     return (node is null) ? 0 : node.height;
867 }
868 
869 struct AVLNodeRealHeight(T) {
870     T payload;
871     typeof(this)* left;
872     typeof(this)* right;
873     int height;
874 
875     int balance() const nothrow @property {
876         return .height(left) - .height(right);
877     }
878 
879     void fixHeight() nothrow {
880         auto leftHeight = .height(left);
881         auto rightHeight = .height(right);
882 
883         height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1;
884     }
885 
886     bool isLeaf() nothrow @property {
887         return left is null && right is null;
888     }
889 }
890 
891 /* Store the height in the low order bits of the pointers to save space,
892  * since RegionAllocator allocates 16-byte aligned memory anyhow, but only if
893  * this would be smaller after considering alignment.
894  */
895 struct AVLNodeBitwise(T) {
896     T payload;
897     size_t _left;
898     size_t _right;
899 
900     enum size_t mask = 0b1111;
901     enum size_t notMask = ~mask;
902 
903     typeof(this)* left() nothrow @property {
904         return cast(typeof(return)) (_left & notMask);
905     }
906 
907     const(typeof(this))* left() const nothrow @property {
908         return cast(typeof(return)) (_left & notMask);
909     }
910 
911     void left(typeof(this)* newLeft) nothrow @property
912     in {
913         assert((cast(size_t) newLeft & mask) == 0);
914     } do {
915         _left &= mask;
916         _left |= cast(size_t) newLeft;
917         assert(left is newLeft);
918     }
919 
920     typeof(this)* right() nothrow @property {
921         return cast(typeof(return)) (_right & notMask);
922     }
923 
924     const(typeof(this))* right() const nothrow @property {
925         return cast(typeof(return)) (_right & notMask);
926     }
927 
928     void right(typeof(this)* newRight) nothrow @property
929     in {
930         assert((cast(size_t) newRight & mask) == 0);
931     } do {
932         _right &= mask;
933         _right |= cast(size_t) newRight;
934         assert(right is newRight);
935     }
936 
937     int height() const nothrow @property {
938         return (((_left & mask) << 4) |
939                     (_right & mask));
940     }
941 
942     void height(int newHeight) nothrow @property {
943         _right &= notMask;
944         _right |= (newHeight & mask);
945         newHeight >>= 4;
946         _left &= notMask;
947         _left |= (newHeight & mask);
948     }
949 
950     int balance() const nothrow @property {
951         return .height(left) - .height(right);
952     }
953 
954     void fixHeight() nothrow {
955         auto leftHeight = .height(left);
956         auto rightHeight = .height(right);
957 
958         height = ((leftHeight > rightHeight) ? leftHeight : rightHeight) + 1;
959     }
960 
961     bool isLeaf() const nothrow @property {
962         return left is null && right is null;
963     }
964 }
965 
966 private template GetAligned(uint size) {
967     static if(size % RegionAllocator.alignBytes(size) == 0) {
968         enum GetAligned = 0;
969     } else {
970         enum GetAligned =
971             size - size % RegionAllocator.alignBytes(size) +
972                 RegionAllocator.alignBytes(size);
973     }
974 }
975 
976 /**An AVL tree implementation on top of RegionAllocator.  If elements are removed,
977  * they are stored on an internal free list and recycled when new elements
978  * are added to the tree.
979  *
980  * Template paramters:
981  *
982  * T = The type to be stored in the tree.
983  *
984  * key = Function to access the key that what you're storing is to be compared
985  *       on.
986  *
987  * compFun = The function for comparing keys.
988  *
989  * Examples:
990  * ---
991  * struct StringNum {
992  *     string someString;
993  *     uint num;
994  * }
995  *
996  * // Create a StackTree of StringNums, sorted in descending order, using
997  * // someString for comparison.
998  * auto alloc = newRegionAllocator();
999  * auto myTree = StackTree!(StringNum, "a.someString", "a > b")(alloc);
1000  *
1001  * // Add some elements.
1002  * myTree.insert( StringNum("foo", 1));
1003  * myTree.insert( StringNum("bar", 2));
1004  * myTree.insert( StringNum("foo", 3));
1005  *
1006  * assert(myTree.find("foo") == StringNum("foo", 3));
1007  * assert(myTree.find("bar") == StringNum("bar", 2));
1008  * ---
1009  *
1010  * Note:  This tree supports a compile-time interface similar to StackSet
1011  * and can be used as a finite set implementation.
1012  *
1013  * Warning:
1014  * This implementation places removed nodes on an internal free list and
1015  * recycles them, since there is no way to delete RegionAllocator-allocated data
1016  * in a non-LIFO order.  Therefore, you may not retain the address of a
1017  * variable stored in a StackTree after deleting it from the StackTree.
1018  * For example, DO NOT do this:
1019  * ---
1020  * SomeType* myPtr = "foo" in myTree;
1021  * myTree.remove("foo");
1022  * *myPtr = someValue;
1023  * ---
1024  */
1025 struct StackTree(T, alias key = "a", alias compFun = "a < b") {
1026 private:
1027 
1028     alias AVLNodeBitwise!(T) BitwiseNode;
1029     alias AVLNodeRealHeight!(T) RealNode;
1030 
1031     enum size_t bitSize = GetAligned!(BitwiseNode.sizeof);
1032     enum size_t realHeightSize = GetAligned!(RealNode.sizeof);
1033 
1034     static if(bitSize < realHeightSize ) {
1035         alias AVLNodeBitwise!(T) Node;
1036     } else {
1037         alias AVLNodeRealHeight!(T) Node;
1038     }
1039 
1040     alias binaryFun!(compFun) comp;
1041     alias unaryFun!(key) getKey;
1042 
1043     Node* head;
1044     Node** freeList;
1045     size_t _length;
1046     RegionAllocator alloc;
1047 
1048     static bool insertComp(T lhs, T rhs) {
1049         return comp( getKey(lhs), getKey(rhs));
1050     }
1051 
1052     static Node* rotateRight(Node* node)
1053     in {
1054         assert(node.left !is null);
1055         assert( abs(node.balance) <= 2);
1056 
1057     } do {
1058         Node* newHead = node.left;
1059         node.left = newHead.right;
1060         newHead.right = node;
1061 
1062         node.fixHeight();
1063         newHead.fixHeight();
1064 
1065         assert( abs(node.balance) < 2);
1066         return newHead;
1067     }
1068 
1069     static Node* rotateLeft(Node* node)
1070     in {
1071         assert(node.right !is null);
1072         assert( abs(node.balance) <= 2);
1073     } do {
1074         Node* newHead = node.right;
1075         node.right = newHead.left;
1076         newHead.left = node;
1077 
1078         node.fixHeight();
1079         newHead.fixHeight();
1080 
1081         assert( abs(node.balance) < 2);
1082         return newHead;
1083     }
1084 
1085     static Node* rebalance(Node* node)
1086     in {
1087         assert(node is null || abs(node.balance) <= 2);
1088     } out(ret) {
1089         assert( abs(ret.balance) < 2);
1090     } do {
1091         if(node is null) {
1092             return null;
1093         }
1094 
1095         immutable balance = node.balance;
1096         if(abs(balance) <= 1) {
1097             return node;
1098         }
1099 
1100         if(balance == -2) {
1101 
1102             // It should be impossible to have a balance factor of -2 if
1103             // node.right is null.
1104             assert(node.right !is null);
1105             immutable rightBalance = node.right.balance;
1106             assert( abs(rightBalance) < 2);
1107 
1108             if(rightBalance == 1) {
1109                 node.right = rotateRight(node.right);
1110                 node.fixHeight();
1111             }
1112 
1113             assert(node.balance == -2);
1114             return rotateLeft(node);
1115 
1116         } else if(balance == 2) {
1117             // It should be impossible to have a balance factor of 2 if
1118             // node.left is null.
1119             assert(node.left !is null);
1120             immutable leftBalance = node.left.balance;
1121             assert( abs(leftBalance) < 2);
1122 
1123             if(leftBalance == -1) {
1124                 node.left = rotateLeft(node.left);
1125                 node.fixHeight();
1126             }
1127 
1128             assert(node.balance == 2);
1129             return rotateRight(node);
1130         }
1131 
1132         // AVL tree invariant is that abs(balance) <= 2 even during
1133         // insertion/deletion.
1134         assert(0);
1135     }
1136 
1137     void pushFreeList(Node* node) {
1138         node.left = null;
1139         node.right = *freeList;
1140         *freeList = node;
1141     }
1142 
1143     Node* popFreeList()
1144     in {
1145         assert(freeList);
1146         assert(*freeList);
1147     } do {
1148         auto ret = *freeList;
1149         *freeList = ret.right;
1150         return ret;
1151     }
1152 
1153     Node* newNode(T payload)
1154     in {
1155         assert(freeList, "Uninitialized StackTree!(" ~ T.stringof ~ ")");
1156     } do {
1157         Node* ret;
1158         if(*freeList !is null) {
1159             ret = popFreeList();
1160         } else {
1161             ret = cast(Node*) alloc.allocate(Node.sizeof);
1162         }
1163 
1164         ret.payload = payload;
1165         ret.left = null;
1166         ret.right = null;
1167         ret.height = 1;
1168         return ret;
1169     }
1170 
1171 public:
1172     ///
1173     this(RegionAllocator alloc) {
1174         this.alloc = alloc;
1175         this.freeList = alloc.uninitializedArray!(Node*[])(1).ptr;
1176         *(this.freeList) = null;
1177     }
1178 
1179     /**Insert an element.*/
1180     void insert(T toInsert) {
1181         if(head is null) {
1182             head = newNode(toInsert);
1183             _length++;
1184         } else {
1185             head = insertImpl(toInsert, head);
1186         }
1187     }
1188 
1189     Node* insertImpl(T toInsert, Node* insertInto) {
1190         if( insertComp(toInsert, insertInto.payload) ) {
1191             if(insertInto.left is null) {
1192                 insertInto.left = newNode(toInsert);
1193                 _length++;
1194             } else {
1195                 insertInto.left = insertImpl(toInsert, insertInto.left);
1196             }
1197         } else if( insertComp(insertInto.payload, toInsert) ) {
1198             if(insertInto.right is null) {
1199                 insertInto.right = newNode(toInsert);
1200                 _length++;
1201             } else {
1202                 insertInto.right = insertImpl(toInsert, insertInto.right);
1203             }
1204         } else {
1205             // This is correct:  If the comparison key is only part of the
1206             // payload, the old payload may not be equal to the new payload,
1207             // even if the comparison keys are equal.
1208             insertInto.payload = toInsert;
1209             return insertInto;
1210         }
1211 
1212         insertInto.fixHeight();
1213         return rebalance(insertInto);
1214     }
1215 
1216     /**Remove an element from this tree.  The type of U is expected to be the
1217      * type of the key that this tree is sorted on.
1218      */
1219     void remove(U)(U whatToRemove) {
1220         Node* removedNode;
1221         Node* leftMost;
1222 
1223         Node* removeLeftMost(Node* node) {
1224             if(node.left is null) {
1225                 auto ret = node.right;
1226                 node.right = null;
1227                 leftMost = node;
1228                 return ret;
1229             }
1230 
1231             node.left = removeLeftMost(node.left);
1232             node.fixHeight();
1233             return rebalance(node);
1234         }
1235 
1236         Node* removeSuccessor(Node* node) {
1237             if(node.right is null) {
1238                 assert(node.left.isLeaf);
1239                 leftMost = node.left;
1240 
1241                 node.left = null;
1242                 return node;
1243             }
1244 
1245             node.right = removeLeftMost(node.right);
1246             node.fixHeight();
1247             return node;
1248         }
1249 
1250         Node* removeImpl(U whatToRemove, Node* whereToRemove) {
1251             static bool findComp(V, W)(V lhs, W rhs) {
1252                 static if(is(V == T)) {
1253                     static assert(is(W == U));
1254                     return comp( getKey(lhs), rhs);
1255                 } else {
1256                     static assert(is(V == U));
1257                     static assert(is(W == T));
1258                     return comp(lhs, getKey(rhs) );
1259                 }
1260             }
1261 
1262             if(whereToRemove is null) {
1263                 return null;
1264             }
1265 
1266             if( findComp(whatToRemove, whereToRemove.payload) ){
1267                 whereToRemove.left = removeImpl(whatToRemove, whereToRemove.left);
1268                 whereToRemove.fixHeight();
1269                 return rebalance(whereToRemove);
1270             } else if( findComp(whereToRemove.payload, whatToRemove) ) {
1271                 whereToRemove.right = removeImpl(whatToRemove, whereToRemove.right);
1272                 whereToRemove.fixHeight();
1273                 return rebalance(whereToRemove);
1274             } else {
1275                 // We've found it.
1276                 _length--;
1277                 removedNode = whereToRemove;
1278                 if(whereToRemove.isLeaf) {
1279                     return null;
1280                 }
1281 
1282                 whereToRemove = removeSuccessor(whereToRemove);
1283                 if(leftMost is null) {
1284                     return null;
1285                 }
1286 
1287                 leftMost.left = whereToRemove.left;
1288                 leftMost.right = whereToRemove.right;
1289                 leftMost.fixHeight();
1290                 return rebalance(leftMost);
1291             }
1292         }
1293 
1294         head = removeImpl(whatToRemove, head);
1295 
1296         debug(EXPENSIVE) assertAvl(head);
1297 
1298         if(removedNode !is null) {
1299             pushFreeList(removedNode);
1300         }
1301     }
1302 
1303     /**Find an element and return it.  Throw an exception if it is not
1304      * present.  U is expected to be the type of the key that this tree is
1305      * sorted on.*/
1306     T find(U)(U whatToFind) {
1307         T* ptr = dstatsEnforce(whatToFind in this,
1308             "Item not found:  " ~ to!string(whatToFind));
1309         return *ptr;
1310     }
1311 
1312     /**Find an element and return a pointer to it, or null if not present.*/
1313     T* opBinaryRight(string op : "in", U)(U whatToFind) {
1314         auto ret = findImpl!(U)(whatToFind, head);
1315         if(ret is null) {
1316             return null;
1317         }
1318         return &(ret.payload);
1319     }
1320 
1321     Node* findImpl(U)(U whatToFind, Node* whereToFind) {
1322         static bool findComp(V, W)(V lhs, W rhs) {
1323             static if(is(V == T)) {
1324                 static assert(is(W == U));
1325                 return comp( getKey(lhs), rhs );
1326             } else {
1327                 static assert(is(V == U));
1328                 static assert(is(W == T));
1329                 return comp( lhs, getKey(rhs) );
1330             }
1331         }
1332 
1333         if(whereToFind is null) {
1334             return null;
1335         }
1336 
1337         if( findComp(whatToFind, whereToFind.payload) ){
1338             return findImpl!(U)(whatToFind, whereToFind.left);
1339         } else if( findComp(whereToFind.payload, whatToFind) ) {
1340             return findImpl!(U)(whatToFind, whereToFind.right);
1341         } else {
1342             // We've found it.
1343             return whereToFind;
1344         }
1345 
1346         assert(0);
1347     }
1348 
1349     /**Iterate over the elements of this tree in sorted order.*/
1350     int opApply( int delegate(ref T) dg) {
1351         int res;
1352         int opApplyImpl(Node* node) {
1353             if(node is null) {
1354                 return 0;
1355             }
1356             res = opApplyImpl(node.left);
1357             if(res) {
1358                 return res;
1359             }
1360             res = dg(node.payload);
1361             if(res) {
1362                 return res;
1363             }
1364             res = opApplyImpl(node.right);
1365             return res;
1366         }
1367 
1368         return opApplyImpl(head);
1369     }
1370 
1371     /**Number of elements in the tree.*/
1372     @property size_t length() const pure nothrow {
1373         return _length;
1374     }
1375 }
1376 
1377 private int assertAvl(T)(T node) {
1378     if(node is null) {
1379         return 0;
1380     }
1381 
1382     int leftHeight = assertAvl(node.left);
1383     int rightHeight = assertAvl(node.right);
1384     assert(node.height == max(leftHeight, rightHeight) + 1);
1385     assert( abs(node.balance) < 2,
1386         text( height(node.left), '\t', height(node.right)));
1387 
1388     if(node.left) {
1389         assert(node.left.payload < node.payload);
1390     }
1391 
1392     if(node.right) {
1393         assert(node.right.payload > node.payload,
1394             text(node.payload, ' ', node.right.payload));
1395     }
1396 
1397     return node.height;
1398 }
1399 
1400 
1401 unittest {
1402     // Test against StackSet on random data.
1403     auto alloc = newRegionAllocator();
1404     StackTree!(uint) myTree = StackTree!(uint)(alloc);
1405     StackSet!(uint) ss = StackSet!(uint)(500, alloc);
1406     foreach(i; 0..1_000_000) {
1407         uint num = uniform(0, 1_000);
1408         if(num in ss) {
1409             assert(num in myTree);
1410             assert(*(num in myTree) == num);
1411             ss.remove(num);
1412             myTree.remove(num);
1413         } else {
1414             assert(!(num in myTree));
1415             ss.insert(num);
1416             myTree.insert(num);
1417         }
1418     }
1419     assertAvl(myTree.head);
1420 }
1421 
1422 /**Struct that iterates over keys or values of a StackTreeAA.
1423  *
1424  * Bugs:  Uses opApply instead of the more flexible ranges, because I
1425  * haven't figured out how to iterate efficiently and in sorted order over a
1426  * tree without control of the stack.
1427  */
1428 struct TreeAaIter(T, alias mapFun) {
1429     alias unaryFun!(mapFun) mFun;
1430     T tree;
1431     alias typeof(*(tree.head)) Node;
1432 
1433 //    TreeRange!(T, mapFun) asRange() {
1434 //        dstatsEnforce(0, "Not implemented yet.");
1435 //    }
1436 
1437     alias typeof( mFun( tree.head.payload ) ) IterType;
1438 
1439     ///
1440     int opApply( int delegate(ref IterType) dg) {
1441         int res;
1442         int opApplyImpl(Node* node) {
1443             if(node is null) {
1444                 return 0;
1445             }
1446             res = opApplyImpl(node.left);
1447             if(res) {
1448                 return res;
1449             }
1450 
1451             static if(__traits(compiles, dg(mFun(node.payload)))) {
1452                 res = dg(mFun(node.payload));
1453             } else {
1454                 auto toDg = mFun(node.payload);
1455                 res = dg(toDg);
1456             }
1457 
1458             if(res) {
1459                 return res;
1460             }
1461             res = opApplyImpl(node.right);
1462             return res;
1463         }
1464 
1465         return opApplyImpl(tree.head);
1466     }
1467 
1468     ///
1469     @property size_t length() const pure nothrow {
1470         return tree.length;
1471     }
1472 }
1473 
1474 private struct StackTreeAANode(K, V) {
1475     Unqual!(K) key;
1476     Unqual!(V) value;
1477 }
1478 
1479 /**An associative array implementation based on StackTree.  Lookups and
1480  * insertions are O(log N).  This is significantly slower in both theory and
1481  * practice than StackHash, but you may want to use it if:
1482  *
1483  * 1.  You don't know the approximate size of the table you will be creating
1484  *     in advance.  Unlike StackHash, this AA implementation does not need
1485  *     to pre-allocate anything.
1486  *
1487  * 2.  You care more about worst-case performance than average-case
1488  *     performance.
1489  *
1490  * 3.  You have a good comparison function for your type, but not a good hash
1491  *     function.
1492  *
1493  */
1494 struct StackTreeAA(K, V) {
1495     alias StackTreeAANode!(K, V) Node;
1496     StackTree!(Node, "a.key") tree;
1497 
1498     ///
1499     this(RegionAllocator alloc) {
1500         this.tree = typeof(tree)(alloc);
1501     }
1502 
1503     /**Looks up key in the table, returns it by reference.  If it does not
1504      * exist, it will be created and initialized to V.init.  This is handy,
1505      * for example, when counting things with integer types.
1506      */
1507     ref V opIndex(K key) {
1508         Node* result = key in tree;
1509         if(result is null) {
1510             tree.insert( Node(key, V.init));
1511             result = key in tree;
1512         }
1513 
1514         return result.value;
1515     }
1516 
1517     ///
1518     V opIndexAssign(V val, K key) {
1519         tree.insert( Node(key, val));
1520         return val;
1521     }
1522 
1523     ///
1524     V* opBinaryRight(string op : "in")(K key) {
1525         auto nodePtr = key in tree;
1526         if(nodePtr is null) {
1527             return null;
1528         }
1529 
1530         return &(nodePtr.value);
1531     }
1532 
1533     ///
1534     void remove(K key) {
1535         tree.remove(key);
1536     }
1537 
1538     ///
1539     @property size_t length() const pure nothrow {
1540         return tree.length;
1541     }
1542 
1543     ///
1544     TreeAaIter!( typeof(tree), "a.key") keys() @property {
1545         typeof(return) ret;
1546         ret.tree = tree;
1547         return ret;
1548     }
1549 
1550     private static ref Unqual!(V) getVal(ref Node node) {
1551         return node.value;
1552     }
1553 
1554     ///
1555     TreeAaIter!( typeof(tree), getVal) values() @property {
1556         return typeof(return)(tree);
1557     }
1558 
1559     /**Iterate over both the keys and values of this associative array.*/
1560     int opApply( int delegate(ref Unqual!(K), ref Unqual!(V)) dg) {
1561         alias typeof(*(tree.head)) TreeNode;
1562         int res;
1563         int opApplyImpl(TreeNode* node) {
1564             if(node is null) {
1565                 return 0;
1566             }
1567             res = opApplyImpl(node.left);
1568             if(res) {
1569                 return res;
1570             }
1571             res = dg(node.payload.key, node.payload.value);
1572             if(res) {
1573                 return res;
1574             }
1575             res = opApplyImpl(node.right);
1576             return res;
1577         }
1578 
1579         return opApplyImpl(tree.head);
1580     }
1581 
1582 }
1583 
1584 unittest {
1585 
1586     // Test against builtin AA on random data.
1587     {
1588         auto alloc = newRegionAllocator();
1589         alias StackTreeAA!(string, uint) mySh;
1590         auto data = mySh(alloc);
1591         data["foo"] = 1;
1592         data["bar"] = 2;
1593         data["baz"] = 3;
1594         data["waldo"] = 4;
1595         assert(!("foobar" in data));
1596         assert(*("foo" in data) == 1);
1597         assert(*("bar" in data) == 2);
1598         assert(*("baz" in data) == 3);
1599         assert(*("waldo" in data) == 4);
1600         assert(data["foo"] == 1);
1601         assert(data["bar"] == 2);
1602         assert(data["baz"] == 3);
1603         assert(data["waldo"] == 4);
1604 
1605         assert(data.length == 4);
1606         auto myKeys = array(data.keys);
1607         qsort(myKeys);
1608         assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]);
1609         auto myValues = array(data.values);
1610         qsort(myValues);
1611         assert(myValues == [1U, 2, 3, 4]);
1612 
1613         foreach(v; data.values) {
1614             assert(v > 0 && v < 5);
1615         }
1616     }
1617 
1618     alias StackTreeAA!(uint, uint) mySh2;
1619     {   // Test remove.
1620         auto alloc = newRegionAllocator();
1621 
1622         auto foo = mySh2(alloc);
1623         for(uint i = 0; i < 200; i++) {
1624             foo[i] = i;
1625         }
1626         assert(foo.length == 200);
1627         for(uint i = 0; i < 200; i += 2) {
1628             foo.remove(i);
1629         }
1630         foreach(i; 20..200) {
1631             foo.remove(i);
1632         }
1633         for(uint i = 0; i < 20; i++) {
1634             if(i & 1) {
1635                 assert(i in foo);
1636                 assert(*(i in foo) == i);
1637             } else {
1638                 assert(!(i in foo));
1639             }
1640         }
1641         auto vals = array(foo.values);
1642         assert(foo.length == 10);
1643         assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]);
1644     }
1645 
1646     { // Monte carlo unittesting against builtin hash table.
1647         auto alloc = newRegionAllocator();
1648         uint[uint] builtin;
1649         auto monteSh = mySh2(alloc);
1650         uint[] nums = alloc.uninitializedArray!(uint[])(100_000);
1651         foreach(ref num; nums) {
1652             num = uniform(0U, uint.max);
1653         }
1654 
1655         foreach(i; 0..10_000) {
1656             auto index = uniform(0, cast(uint) nums.length);
1657             if(index in builtin) {
1658                 assert(index in monteSh);
1659                 assert(builtin[index] == nums[index]);
1660                 assert(monteSh[index] == nums[index]);
1661                 builtin.remove(index);
1662                 monteSh.remove(index);
1663             } else {
1664                 assert(!(index in monteSh));
1665                 builtin[index] = nums[index];
1666                 monteSh[index] = nums[index];
1667             }
1668         }
1669 
1670         assert(builtin.length == monteSh.length);
1671         foreach(k, v; builtin) {
1672             assert(k in monteSh);
1673             assert(*(k in builtin) == *(k in monteSh));
1674             assert(monteSh[k] == v);
1675         }
1676 
1677         // Make sure nothing is missed in iteration.  Since both keys and
1678         // values use the same struct, just with a few static if statements,
1679         // if it works for keys and simple tests work for values, it works.
1680         foreach(k; monteSh.keys) {
1681             builtin.remove(k);
1682         }
1683         assert(builtin.length == 0);
1684     }
1685 }
1686 
1687 version(scid) {
1688     public import scid.internal.regionallocator;
1689 } else {
1690     version = noscid;
1691 }
1692 
1693 version(noscid):
1694 
1695 import std.traits, core.memory, std.range, core.exception, std.conv,
1696     std.algorithm, std.typetuple, std.exception, std.typecons;
1697 
1698 static import core.stdc.stdlib;
1699 
1700 // This is just for convenience/code readability/saving typing.
1701 private enum ptrSize = (void*).sizeof;
1702 
1703 // This was accidentally assumed in a few places and I'm too lazy to fix it
1704 // until I see proof that it needs to be fixed.
1705 static assert(bool.sizeof == 1);
1706 
1707 enum size_t defaultSegmentSize = 4 * 1_024 * 1_024;
1708 
1709 /**
1710 The exception that is thrown on invalid use of $(RegionAllocator) and
1711 $(D RegionAllocatorStack).  This exception is not thrown on out of memory.
1712 An $(D OutOfMemoryError) is thrown instead.
1713 */
1714 class RegionAllocatorException : Exception {
1715     this(string msg, string file, int line) @safe {
1716         super(msg, file, line);
1717     }
1718 }
1719 
1720 /**
1721 This flag determines whether a given $(D RegionAllocatorStack) is scanned for
1722 pointers by the garbage collector (GC).  If yes, the entire stack is scanned,
1723 not just the part currently in use, since there is currently no efficient way to
1724 modify the bounds of a GC region.  The stack is scanned conservatively, meaning
1725 that any bit pattern that would point to GC-allocated memory if interpreted as
1726 a pointer is considered to be a pointer.  This can result in GC-allocated
1727 memory being retained when it should be freed.  Due to these caveats,
1728 it is recommended that any stack scanned by the GC be small and/or short-lived.
1729 */
1730 enum GCScan : bool {
1731     ///
1732     no = false,
1733 
1734     ///
1735     yes = true
1736 }
1737 
1738 /**
1739 This object represents a segmented stack.  Memory can be allocated from this
1740 stack using a $(XREF regionallocator RegionAllocator) object.  Multiple
1741 $(D RegionAllocator) objects may be created per
1742 $(D RegionAllocatorStack) but each $(D RegionAllocator) uses a single
1743 $(D RegionAllocatorStack).
1744 
1745 For most use cases it's convenient to use the default thread-local
1746 instance of $(D RegionAllocatorStack), which is lazily instantiated on
1747 the first call to the global function
1748 $(XREF regionallocator, newRegionAllocator).  Occasionally it may be useful
1749 to have multiple independent stacks in one thread, in which case a
1750 $(D RegionAllocatorStack) can be created manually.
1751 
1752 $(D RegionAllocatorStack) is reference counted and has reference semantics.
1753 When the last copy of a given instance goes out of scope, the memory
1754 held by the $(D RegionAllocatorStack) instance is released back to the
1755 heap.  This cannot happen before memory allocated to a $(D RegionAllocator)
1756 instance is released back to the stack, because a $(D RegionAllocator)
1757 holds a copy of the $(D RegionAllocatorStack) instance it uses.
1758 
1759 Examples:
1760 ---
1761 import std.regionallocator;
1762 
1763 void main() {
1764     fun1();
1765 }
1766 
1767 void fun1() {
1768     auto stack = RegionAllocatorStack(1_048_576, GCScan.no);
1769     fun2(stack);
1770 
1771     // At the end of fun1, the last copy of the RegionAllocatorStack
1772     // instance pointed to by stack goes out of scope.  The memory
1773     // held by stack is released back to the heap.
1774 }
1775 
1776 void fun2(RegionAllocatorStack stack) {
1777     auto alloc = stack.newRegionAllocator();
1778     auto arr = alloc.newArray!(double[])(1_024);
1779 
1780     // At the end of fun2, the last copy of the RegionAllocator instance
1781     // pointed to by alloc goes out of scope.  The memory used by arr
1782     // is released back to stack.
1783 }
1784 ---
1785 */
1786 struct RegionAllocatorStack {
1787 private:
1788     RefCounted!(RegionAllocatorStackImpl, RefCountedAutoInitialize.no) impl;
1789     bool initialized;
1790     bool _gcScanned;
1791 
1792 public:
1793     /**
1794     Create a new $(D RegionAllocatorStack) with a given segment size in bytes.
1795     */
1796     this(size_t segmentSize, GCScan shouldScan) {
1797         this._gcScanned = shouldScan;
1798         if(segmentSize == 0) {
1799             throw new RegionAllocatorException(
1800                 "Cannot create a RegionAllocatorStack with segment size of 0.",
1801                 __FILE__, __LINE__
1802             );
1803         }
1804 
1805         impl = typeof(impl)(segmentSize, shouldScan);
1806         initialized = true;
1807     }
1808 
1809     /**
1810     Creates a new $(D RegionAllocator) region using this stack.
1811     */
1812     RegionAllocator newRegionAllocator() {
1813         if(!initialized) {
1814             throw new RegionAllocatorException(
1815                 "Cannot create a RegionAllocator from an " ~
1816                 "uninitialized RegionAllocatorStack.  Did you call " ~
1817                 "RegionAllocatorStack's constructor?",
1818                 __FILE__,
1819                 __LINE__
1820             );
1821         }
1822 
1823         return RegionAllocator(this);
1824     }
1825 
1826     /**
1827     Whether this stack is scanned by the garbage collector.
1828     */
1829     bool gcScanned() @property const pure nothrow @safe {
1830         return _gcScanned;
1831     }
1832 }
1833 
1834 private struct RegionAllocatorStackImpl {
1835 
1836     this(size_t segmentSize, GCScan shouldScan) {
1837         this.segmentSize = segmentSize;
1838         space = alignedMalloc(segmentSize, shouldScan);
1839 
1840         // We don't need 16-byte alignment for the bookkeeping array.
1841         immutable nBookKeep = segmentSize / alignBytes;
1842         bookkeep = (cast(SizetPtr*) core.stdc.stdlib.malloc(nBookKeep))
1843                     [0..nBookKeep / SizetPtr.sizeof];
1844 
1845         if(!bookkeep.ptr) {
1846             outOfMemory();
1847         }
1848 
1849         nBlocks++;
1850     }
1851 
1852     size_t segmentSize;  // The size of each segment.
1853 
1854     size_t used;
1855     void* space;
1856     size_t bookkeepIndex;
1857     SizetPtr[] bookkeep;
1858     uint nBlocks;
1859     uint nFree;
1860     size_t regionIndex = size_t.max;
1861 
1862     // inUse holds info for all blocks except the one currently being
1863     // allocated from.  freeList holds space ptrs for all free blocks.
1864 
1865     static struct Block {
1866         size_t used = 0;
1867         void* space = null;
1868     }
1869 
1870     SimpleStack!(Block) inUse;
1871     SimpleStack!(void*) freeList;
1872 
1873     void doubleSize(ref SizetPtr[] bookkeep) {
1874         size_t newSize = bookkeep.length * 2;
1875         auto ptr = cast(SizetPtr*) core.stdc.stdlib.realloc(
1876             bookkeep.ptr, newSize * SizetPtr.sizeof);
1877 
1878         if(!ptr) {
1879             outOfMemory();
1880         }
1881 
1882         bookkeep = ptr[0..newSize];
1883     }
1884 
1885     // Add an element to bookkeep, checking length first.
1886     void putLast(void* last) {
1887         if (bookkeepIndex == bookkeep.length) doubleSize(bookkeep);
1888         bookkeep[bookkeepIndex].p = last;
1889         bookkeepIndex++;
1890     }
1891 
1892     void putLast(size_t num) {
1893         return putLast(cast(void*) num);
1894     }
1895 
1896     // The number of objects allocated on the C heap instead of here.
1897     ref size_t nLargeObjects() @property pure nothrow {
1898         return bookkeep[regionIndex - 4].i;
1899     }
1900 
1901     // The number of extra segments that have been allocated in the current
1902     // region beyond the base segment of the region.
1903     ref size_t nExtraSegments() @property pure nothrow {
1904         return bookkeep[regionIndex - 3].i;
1905     }
1906 
1907     // Pushes a segment to the internal free list and frees segments to the
1908     // heap if there are more than 2x as many segments on the free list as
1909     // in use.
1910     void freeSegment() {
1911         nExtraSegments--;
1912         freeList.push(space);
1913         auto newHead = inUse.pop();
1914         space = newHead.space;
1915         used = newHead.used;
1916         nBlocks--;
1917         nFree++;
1918 
1919         if (nFree >= nBlocks * 2) {
1920             foreach(i; 0..nFree / 2) {
1921                 alignedFree(freeList.pop);
1922                 nFree--;
1923             }
1924         }
1925     }
1926 
1927     void destroy() {
1928         if(space) {
1929             alignedFree(space);
1930             space = null;
1931         }
1932 
1933         if(bookkeep) {
1934             core.stdc.stdlib.free(bookkeep.ptr);
1935             bookkeep = null;
1936         }
1937 
1938         while(inUse.index > 0) {
1939             auto toFree = inUse.pop();
1940             alignedFree(toFree.space);
1941         }
1942 
1943         inUse.destroy();
1944 
1945         while(freeList.index > 0) {
1946             auto toFree = freeList.pop();
1947             alignedFree(toFree);
1948         }
1949 
1950         freeList.destroy();
1951     }
1952 
1953     ~this() {
1954         destroy();
1955     }
1956 }
1957 
1958 /**
1959 These properties get and set the segment size of the default thread-local
1960 $(D RegionAllocatorStack) instance.  The default size is 4 megabytes.
1961 The setter is only effective before the global function
1962 $(D newRegionAllocator) has been called for the first time in the current
1963 thread.  Attempts to set this property after the first call to this
1964 function from the current thread throw a $(D RegionAllocatorException).
1965 */
1966 size_t threadLocalSegmentSize() @property nothrow @safe {
1967     return _threadLocalSegmentSize;
1968 }
1969 
1970 /// Ditto
1971 size_t threadLocalSegmentSize(size_t newSize) @property @safe {
1972     if(threadLocalInitialized) {
1973         throw new RegionAllocatorException(
1974             "Cannot set threadLocalSegmentSize after the thread-local " ~
1975             "RegionAllocatorStack has been used for the first time.",
1976             __FILE__,
1977             __LINE__
1978         );
1979     }
1980 
1981     return _threadLocalSegmentSize = newSize;
1982 }
1983 
1984 /**
1985 These properties determine whether the default thread-local
1986 $(D RegionAllocatorStack) instance is scanned by the garbage collector.
1987 The default is no.  In most cases, scanning a stack this long-lived is not
1988 recommended, as it will cause too many false pointers.  (See $(XREF
1989 regionallocator, GCScan) for details.)
1990 
1991 The setter is only effective before the global function
1992 $(D newRegionAllocator) has been called for the first time in the current
1993 thread.  Attempts to set this property after the first call to this
1994 function from the current thread throw a $(D RegionAllocatorException).
1995 */
1996 bool scanThreadLocalStack() @property nothrow @safe {
1997     return _scanThreadLocalStack;
1998 }
1999 
2000 /// Ditto
2001 bool scanThreadLocalStack(bool shouldScan) @property @safe {
2002     if(threadLocalInitialized) {
2003         throw new RegionAllocatorException(
2004             "Cannot set scanThreadLocalStack after the thread-local " ~
2005             "RegionAllocatorStack has been used for the first time.",
2006             __FILE__,
2007             __LINE__
2008         );
2009     }
2010 
2011     return _scanThreadLocalStack = shouldScan;
2012 }
2013 
2014 private size_t _threadLocalSegmentSize = defaultSegmentSize;
2015 private RegionAllocatorStack threadLocalStack;
2016 private bool threadLocalInitialized;
2017 private bool _scanThreadLocalStack = false;
2018 
2019 // Ensures the thread-local stack is initialized, then returns it.
2020 private ref RegionAllocatorStack getThreadLocal() {
2021     if(!threadLocalInitialized) {
2022         threadLocalInitialized = true;
2023         threadLocalStack = RegionAllocatorStack(
2024             threadLocalSegmentSize, cast(GCScan) scanThreadLocalStack
2025         );
2026     }
2027 
2028     return threadLocalStack;
2029 }
2030 
2031 static ~this() {
2032     if(threadLocalInitialized) {
2033         threadLocalStack.impl.refCountedPayload.destroy();
2034     }
2035 }
2036 
2037 /**
2038 This struct provides an interface to the $(D RegionAllocator) functionality
2039 and enforces scoped deletion.  A new instance using the thread-local
2040 $(D RegionAllocatorStack) instance is created using the global
2041 $(D newRegionAllocator) function.  A new instance using
2042 an explicitly created $(D RegionAllocatorStack) is created using
2043 $(D RegionAllocatorStack.newRegionAllocator).
2044 
2045 Each instance has reference semantics in that any copy will allocate from the
2046 same memory.  When the last copy of an instance goes out of scope, all memory
2047 allocated via that instance is freed.  Only the most recently created
2048 still-existing $(D RegionAllocator) using a given $(D RegionAllocatorStack)
2049 may be used to allocate and free memory at any given time.  Deviations
2050 from this model result in a $(D RegionAllocatorException) being thrown.
2051 
2052 An uninitialized $(D RegionAllocator) (for example $(D RegionAllocator.init))
2053 has semantics similar to a null pointer.  It may be assigned to or passed to
2054 a function.  However, any attempt to call a method will result in a
2055 $(D RegionAllocatorException) being thrown.
2056 
2057 Examples:
2058 ---
2059 void foo() {
2060     auto alloc = newRegionAllocator();
2061     auto ptr1 = bar(alloc);
2062     auto ptr2 = alloc.allocate(42);
2063 
2064     // The last copy of the RegionAllocator object used to allocate ptr1
2065     // and ptr2 is going out of scope here.  The memory pointed to by
2066     // both ptr1 and ptr2 will be freed.
2067 }
2068 
2069 void* bar(RegionAllocator alloc) {
2070     auto ret = alloc.allocate(42);
2071 
2072     auto alloc2 = newRegionAllocator();
2073     auto ptr3 = alloc2.allocate(42);
2074 
2075     // ptr3 was allocated using alloc2, which is going out of scope.
2076     // Its memory will therefore be freed.  ret was allocated using alloc.
2077     // A copy of this RegionAllocator is still alive in foo() after
2078     // bar() executes.  Therefore, ret will not be freed on returning and
2079     // is still valid after bar() returns.
2080 
2081     return ret;
2082 }
2083 
2084 void* thisIsSafe() {
2085     // This is safe because the two RegionAllocator objects being used
2086     // are using two different RegionAllocatorStack objects.
2087     auto alloc = newRegionAllocator();
2088     auto ptr1 = alloc.allocate(42);
2089 
2090     auto stack = RegionAllocatorStack(1_048_576, GCScan.no);
2091     auto alloc2 = stack.newRegionAllocator();
2092 
2093     auto ptr2 = alloc2.allocate(42);
2094     auto ptr3 = alloc.allocate(42);
2095 }
2096 
2097 void* dontDoThis() {
2098     auto alloc = newRegionAllocator();
2099     auto ptr1 = alloc.allocate(42);
2100     auto alloc2 = newRegionAllocator();
2101 
2102     // Error:  Allocating from a RegionAllocator instance other than the
2103     // most recently created one that's still alive from a given stack.
2104     auto ptr = alloc.allocate(42);
2105 }
2106 
2107 void uninitialized() {
2108     RegionAllocator alloc;
2109     auto ptr = alloc.allocate(42);  // Error:  alloc is not initialized.
2110     auto alloc2 = alloc;  // Ok.  Both alloc, alloc2 are uninitialized.
2111 
2112     alloc2 = newRegionAllocator();
2113     auto ptr2 = alloc2.allocate(42);  // Ok.
2114     auto ptr3 = alloc.allocate(42);  // Error:  alloc is still uninitialized.
2115 
2116     alloc = alloc2;
2117     auto ptr4 = alloc.allocate(42);  // Ok.
2118 }
2119 ---
2120 
2121 Note:  Allocations larger than $(D this.segmentSize) are handled as a special
2122 case and fall back to allocating directly from the C heap.  These large
2123 allocations are freed as if they were allocated on a $(D RegionAllocatorStack)
2124 when $(D free) or $(D freeLast) is called or the last copy of a
2125 $(D RegionAllocator) instance goes out of scope.  However, due to the extra
2126 bookkeeping required, destroying a region (as happens when the last copy of
2127 a $(D RegionAllocator) instance goes out of scope) will require time linear
2128 instead of constant in the number of allocations for regions where these
2129 large allocations are present.
2130 */
2131 struct RegionAllocator {
2132 private:
2133     RegionAllocatorStack stack;
2134 
2135     // The region index that should be current anytime this instance is
2136     // being used.  This is checked for in allocate() and free().
2137     size_t correctRegionIndex = size_t.max;
2138 
2139     this(ref RegionAllocatorStack stack) {
2140         assert(stack.initialized);
2141         auto impl = &(stack.impl.refCountedPayload());
2142         this.stack = stack;
2143 
2144         with(*impl) {
2145             putLast(0);            // nLargeObjects.
2146             putLast(0);            // nExtraSegments.
2147             putLast(regionIndex);  // Old regionIndex.
2148             putLast(1);            // Ref count of current RegionAllocator.
2149             regionIndex = bookkeepIndex;
2150             correctRegionIndex = regionIndex;
2151         }
2152     }
2153 
2154     // CTFE function, for static assertions.  Can't use bsr/bsf b/c it has
2155     // to be usable at compile time.
2156     static bool isPowerOf2(size_t num) pure nothrow @safe {
2157         return num && !(num & (num - 1));
2158     }
2159 
2160     alias RegionAllocatorStackImpl Impl;  // Save typing.
2161 
2162     // This is written as a mixin instead of a function because it's
2163     // performance critical and it wouldn't be inlinable because it throws.
2164     // By using a mixin, initialized can be checked all the time instead of
2165     // just in debug mode, for negligible performance cost.
2166     enum string getStackImplMixin = q{
2167         if(!initialized) {
2168             throw new RegionAllocatorException(
2169                 "RegionAllocator instance not initialized.  Please use " ~
2170                 "newRegionAllocator() to create a RegionAllocator object.",
2171                 __FILE__,
2172                 __LINE__
2173             );
2174         }
2175 
2176         auto impl = &(stack.impl.refCountedPayload());
2177     };
2178 
2179     void incrementRefCount() {
2180         mixin(getStackImplMixin);
2181         impl.bookkeep[correctRegionIndex - 1].i++;
2182     }
2183 
2184     void decrementRefCount() {
2185         mixin(getStackImplMixin);
2186         impl.bookkeep[correctRegionIndex - 1].i--;
2187 
2188         if(impl.bookkeep[correctRegionIndex - 1].i == 0) {
2189             if(impl.regionIndex != correctRegionIndex) {
2190                 throw new RegionAllocatorException(
2191                     "Cannot free RegionAlloc regions in non-last in first " ~
2192                     "out order.  Did you return a RegionAllocator from a " ~
2193                     "function?",
2194                     __FILE__,
2195                     __LINE__
2196                 );
2197             }
2198 
2199             with(*impl) {
2200                 // Free allocations one at a time until we don't have any
2201                 // more large objects.
2202                 while (nLargeObjects > 0 && bookkeepIndex > regionIndex) {
2203                     assert(bookkeepIndex > regionIndex);
2204                     freeLast();
2205                 }
2206 
2207                 // Free any extra segments that were used by this region.
2208                 while(nExtraSegments > 0) {
2209                     freeSegment();
2210                 }
2211 
2212                 if(bookkeepIndex > regionIndex) {
2213                     // Then there's something left to free.
2214                     used = bookkeep[regionIndex].i - cast(size_t) space;
2215                 }
2216 
2217                 bookkeepIndex = regionIndex - 4;
2218                 regionIndex = bookkeep[regionIndex - 2].i;
2219             }
2220         }
2221     }
2222 
2223     bool initialized() @property const pure nothrow @safe {
2224         return correctRegionIndex < size_t.max;
2225     }
2226 
2227     Unqual!(ElementType!(R))[] arrayImplStack(R)(R range) {
2228         alias ElementType!(R) E;
2229         alias Unqual!(E) U;
2230         static if(hasLength!(R)) {
2231             U[] ret = uninitializedArray!(U[])(range.length);
2232             copy(range, ret);
2233             return ret;
2234         } else {
2235             mixin(getStackImplMixin);
2236             auto startPtr = allocate(0);
2237             size_t bytesCopied = 0;
2238 
2239             while(!range.empty) {
2240                 auto elem = range.front;
2241                 if(impl.used + U.sizeof <= segmentSize) {
2242                     range.popFront;
2243                     *(cast(U*) (startPtr + bytesCopied)) = elem;
2244                     bytesCopied += U.sizeof;
2245                     impl.used += U.sizeof;
2246                 } else {
2247                     if(bytesCopied + U.sizeof >= segmentSize / 2) {
2248                         // Then just heap-allocate.
2249                         U[] result = (cast(U*)
2250                             alignedMalloc(bytesCopied * 2, gcScanned))
2251                             [0..bytesCopied / U.sizeof * 2];
2252 
2253                         immutable elemsCopied = bytesCopied / U.sizeof;
2254                         result[0..elemsCopied] = (cast(U*) startPtr)
2255                             [0..elemsCopied];
2256                         finishCopy(result, range, elemsCopied);
2257                         freeLast();
2258                         impl.putLast(result.ptr);
2259                         impl.nLargeObjects++;
2260                         return result;
2261                     } else {
2262                         // Force allocation of a new segment.
2263                         U[] oldData = (cast(U*) startPtr)
2264                             [0..bytesCopied / U.sizeof];
2265                         impl.used -= bytesCopied;
2266                         impl.bookkeepIndex--;
2267                         U[] arr = uninitializedArray!(U[])
2268                             (bytesCopied / U.sizeof + 1);
2269                         arr[0..oldData.length] = oldData[];
2270                         startPtr = impl.space;
2271                         arr[$ - 1] = elem;
2272                         bytesCopied += U.sizeof;
2273                         range.popFront;
2274                     }
2275                 }
2276             }
2277             auto rem = bytesCopied % .alignBytes;
2278             if(rem != 0) {
2279                 auto toAdd = .alignBytes - rem;
2280                 if(impl.used + toAdd < RegionAllocator.segmentSize) {
2281                     impl.used += toAdd;
2282                 } else {
2283                     impl.used = RegionAllocator.segmentSize;
2284                 }
2285             }
2286             return (cast(U*) startPtr)[0..bytesCopied / U.sizeof];
2287         }
2288     }
2289 
2290     Unqual!(ElementType!(R))[] arrayImplHeap(R)(R range) {
2291         // Initial guess of how much space to allocate.  It's relatively large
2292         // b/c the object will be short lived, so speed is more important than
2293         // space efficiency.
2294         enum initialGuess = 128;
2295 
2296         mixin(getStackImplMixin);
2297         alias Unqual!(ElementType!R) E;
2298         auto arr = (cast(E*) alignedMalloc(E.sizeof * initialGuess, true))
2299             [0..initialGuess];
2300 
2301         finishCopy(arr, range, 0);
2302         impl.putLast(arr.ptr);
2303         impl.nLargeObjects++;
2304         return arr;
2305     }
2306 
2307 public:
2308 
2309     this(this) {
2310         if(initialized) incrementRefCount();
2311     }
2312 
2313     ~this() {
2314         if(initialized) decrementRefCount();
2315     }
2316 
2317     void opAssign(RegionAllocator rhs) {
2318         if(initialized) decrementRefCount();
2319         this.stack = rhs.stack;
2320         this.correctRegionIndex = rhs.correctRegionIndex;
2321         if(initialized) incrementRefCount();
2322     }
2323 
2324     /**
2325     Allocates $(D nBytes) bytes on the $(D RegionAllocatorStack) used by this
2326     $(D RegionAllocator) instance.  The last block allocated from this
2327     $(D RegionAllocator) instance can be freed by calling
2328     $(D RegionAllocator.free) or $(D RegionAllocator.freeLast) or will be
2329     automatically freed when the last copy of this $(D RegionAllocator)
2330     instance goes out of scope.
2331 
2332     Allocation requests larger than $(D segmentSize) are
2333     allocated directly on the C heap, are scanned by the GC iff
2334     the $(D RegionAllocatorStack) instance that this object uses is scanned by
2335     the GC, and are freed according to the same rules as described above.
2336     */
2337     void* allocate(size_t nBytes) {
2338         mixin(getStackImplMixin);
2339         if(impl.regionIndex != this.correctRegionIndex) {
2340             throw new RegionAllocatorException(
2341                 "Cannot allocate memory from a RegionAllocator that is not " ~
2342                 "currently at the top of the stack.",
2343                 __FILE__,
2344                 __LINE__
2345             );
2346         }
2347 
2348         nBytes = allocSize(nBytes);
2349         with(*impl) {
2350             void* ret;
2351             if (segmentSize - used >= nBytes) {
2352                 ret = space + used;
2353                 used += nBytes;
2354             } else if (nBytes > segmentSize) {
2355                 ret = alignedMalloc(nBytes, gcScanned);
2356                 impl.nLargeObjects++;
2357             } else if (nFree > 0) {
2358                 nExtraSegments++;
2359                 inUse.push(Block(used, space));
2360                 space = freeList.pop;
2361                 used = nBytes;
2362                 nFree--;
2363                 nBlocks++;
2364                 ret = space;
2365             } else { // Allocate more space.
2366                 nExtraSegments++;
2367                 inUse.push(Block(used, space));
2368                 space = alignedMalloc(segmentSize, gcScanned);
2369                 nBlocks++;
2370                 used = nBytes;
2371                 ret = space;
2372             }
2373             putLast(ret);
2374             return ret;
2375         }
2376     }
2377 
2378     /**
2379     Frees the last block of memory allocated by the current
2380     $(D RegionAllocator).  Throws a $(D RegionAllocatorException) if
2381     this $(D RegionAllocator) is not the most recently created still-existing
2382     $(D RegionAllocator) using its $(D RegionAllocatorStack) instance.
2383     */
2384     void freeLast() {
2385         mixin(getStackImplMixin);
2386         if(impl.regionIndex != this.correctRegionIndex) {
2387             throw new RegionAllocatorException(
2388                 "Cannot free memory to a RegionAllocator that is not " ~
2389                 "currently at the top of the stack, or memory that has not " ~
2390                 "been allocated with this instance.",
2391                 __FILE__,
2392                 __LINE__
2393             );
2394         }
2395 
2396         with(*impl) {
2397             auto lastPos = bookkeep[--bookkeepIndex].p;
2398 
2399             // Handle large blocks.
2400             if(lastPos > space + segmentSize || lastPos < space) {
2401                 alignedFree(lastPos);
2402                 impl.nLargeObjects--;
2403                 return;
2404             }
2405 
2406             used = (cast(size_t) lastPos) - (cast(size_t) space);
2407             if (nBlocks > 1 && used == 0) {
2408                 impl.freeSegment();
2409             }
2410         }
2411     }
2412 
2413     /**
2414     Checks that $(D ptr) is a pointer to the block that would be freed by
2415     $(D freeLast) then calls $(D freeLast).  Throws a
2416     $(D RegionAllocatorException) if the pointer does not point to the
2417     block that would be freed by $(D freeLast).
2418     */
2419     void free(void* ptr) {
2420         mixin(getStackImplMixin);
2421         auto lastPos = impl.bookkeep[impl.bookkeepIndex - 1].p;
2422         if(ptr !is lastPos) {
2423             throw new RegionAllocatorException(
2424                 "Cannot free RegionAllocator memory in non-LIFO order.",
2425                 __FILE__,
2426                 __LINE__
2427             );
2428         }
2429 
2430         freeLast();
2431     }
2432 
2433     /**
2434     Attempts to resize a previously allocated block of memory in place.
2435     This is possible only if $(D ptr) points to the beginning of the last
2436     block allocated by this $(D RegionAllocator) instance and, in the
2437     case where $(D newSize) is greater than the old size, there is
2438     additional space in the segment that $(D ptr) was allocated from.
2439     Additionally, blocks larger than this $(D RegionAllocator)'s segment size
2440     cannot be grown or shrunk.
2441 
2442     Returns:  True if the block was successfully resized, false otherwise.
2443     */
2444     bool resize(const(void)* ptr, size_t newSize) {
2445         mixin(getStackImplMixin);
2446 
2447         // This works since we always allocate in increments of alignBytes
2448         // no matter what the allocation size.
2449         newSize = allocSize(newSize);
2450 
2451         with(*impl) {
2452             auto lastPos = bookkeep[bookkeepIndex - 1].p;
2453             if(ptr !is lastPos) {
2454                 return false;
2455             }
2456 
2457             // If it's a large block directly allocated on the heap,
2458             // then we definitely can't resize it in place.
2459             if(lastPos > space + segmentSize || lastPos < space) {
2460                 return false;
2461             }
2462 
2463             immutable blockSize = used - ((cast(size_t) lastPos) -
2464                 cast(size_t) space);
2465             immutable sizediff_t diff = newSize - blockSize;
2466 
2467             if(cast(sizediff_t) (impl.segmentSize - used) >= diff) {
2468                 used += diff;
2469                 return true;
2470             }
2471         }
2472 
2473         return false;
2474     }
2475 
2476     /**
2477     Returns whether the $(D RegionAllocatorStack) used by this
2478     $(D RegionAllocator) instance is scanned by the garbage collector.
2479     */
2480     bool gcScanned() @property const pure nothrow @safe {
2481         return stack.gcScanned;
2482     }
2483 
2484     /**Allocates an array of type $(D T).  $(D T) may be a multidimensional
2485     array.  In this case sizes may be specified for any number of dimensions
2486     from 1 to the number in $(D T).
2487 
2488     Examples:
2489     ---
2490     auto alloc = newRegionAllocator();
2491     double[] arr = alloc.newArray!(double[])(100);
2492     assert(arr.length == 100);
2493 
2494     double[][] matrix = alloc.newArray!(double[][])(42, 31);
2495     assert(matrix.length == 42);
2496     assert(matrix[0].length == 31);
2497     ---
2498     */
2499     auto newArray(T, I...)(I sizes)
2500     if(allSatisfy!(isIntegral, I)) {
2501 
2502         static void initialize(R)(R toInitialize) {
2503             static if(isArray!(ElementType!R)) {
2504                 foreach(elem; toInitialize) {
2505                     initialize(elem);
2506                 }
2507             } else {
2508                 toInitialize[] = ElementType!(R).init;
2509             }
2510         }
2511 
2512         auto ret = uninitializedArray!(T, I)(sizes);
2513         initialize(ret);
2514         return ret;
2515     }
2516 
2517     /**
2518     Same as $(D newArray), except skips initialization of elements for
2519     performance reasons.
2520     */
2521     auto uninitializedArray(T, I...)(I sizes)
2522     if(allSatisfy!(isIntegral, I)) {
2523         static assert(sizes.length >= 1,
2524             "Cannot allocate an array without the size of at least the first " ~
2525             " dimension.");
2526         static assert(sizes.length <= nDims!T,
2527             to!string(sizes.length) ~ " dimensions specified for a " ~
2528             to!string(nDims!T) ~ " dimensional array.");
2529 
2530         alias typeof(T.init[0]) E;
2531 
2532         auto ptr = cast(E*) allocate(sizes[0] * E.sizeof);
2533         auto ret = ptr[0..sizes[0]];
2534 
2535         static if(sizes.length > 1) {
2536             foreach(ref elem; ret) {
2537                 elem = uninitializedArray!(E)(sizes[1..$]);
2538             }
2539         }
2540 
2541         return ret;
2542     }
2543 
2544     /**
2545     Returns the number of bytes to which an allocation of size nBytes is
2546     guaranteed to be aligned.
2547     */
2548     static size_t alignBytes(size_t nBytes) {
2549         return .alignBytes;
2550     }
2551 
2552     /**
2553     Returns the number of bytes used to satisfy an allocation request
2554     of $(D nBytes).  Will return a value greater than or equal to
2555     $(D nBytes) to account for alignment overhead.
2556     */
2557     static size_t allocSize(size_t nBytes) pure nothrow {
2558         static assert(isPowerOf2(.alignBytes));
2559         return (nBytes + (.alignBytes - 1)) & (~(.alignBytes - 1));
2560     }
2561 
2562     /**
2563     False because memory allocated by this allocator is not automatically
2564     reclaimed by the garbage collector.
2565     */
2566     enum isAutomatic = false;
2567 
2568     /**
2569     True because, when the last last copy of a $(RegionAllocator) instance
2570     goes out of scope, the memory it references is automatically freed.
2571     */
2572     enum isScoped = true;
2573 
2574     /**
2575     True because if memory is freed via $(D free()) instead of $(D freeLast())
2576     then the pointer is checked for validity.
2577     */
2578     enum freeIsChecked = true;
2579 
2580     /**
2581     Returns the segment size of the $(D RegionAllocatorStack) used by this
2582     $(D RegionAllocator).
2583     */
2584     size_t segmentSize() @property {
2585         mixin(getStackImplMixin);
2586         return impl.segmentSize;
2587     }
2588 
2589     /**
2590     Returns the maximum number of bytes that may be allocated in the
2591     current segment.
2592     */
2593     size_t segmentSlack() @property {
2594         mixin(getStackImplMixin);
2595         return impl.segmentSize - impl.used;
2596     }
2597 
2598     /**
2599     Copies $(D range) to an array.  The array will be located on the
2600     $(D RegionAllocator) stack if any of the following conditions apply:
2601 
2602     1.  $(D std.traits.hasIndirections!(ElementType!R)) is false.
2603 
2604     2.  $(D R) is a builtin array.  In this case $(D range) maintains pointers
2605         to all elements at least until $(D array) returns, preventing the
2606         elements from being freed by the garbage collector.  A similar
2607         assumption cannot be made for ranges other than builtin arrays.
2608 
2609     3.  The $(D RegionAllocatorStack) instance used by this
2610         $(D RegionAllocator) is scanned by the garbage collector.
2611 
2612     If none of these conditions is met, the array is returned on the C heap
2613     and $(D GC.addRange) is called.  In either case, $(D RegionAllocator.free),
2614     $(D RegionAllocator.freeLast), or the last copy of this $(D RegionAllocator)
2615     instance going out of scope will free the array as if it had been
2616     allocated on the $(D RegionAllocator) stack.
2617 
2618     Rationale:  The most common reason to call $(D array) on a builtin array is
2619                 to modify its contents inside a function without affecting the
2620                 caller's view.  In this case $(D range) is not modified and
2621                 prevents the elements from being freed by the garbage
2622                 collector.  Furthermore, if the copy returned does need
2623                 to be scanned, the client can call $(D GC.addRange) before
2624                 modifying the original array.
2625 
2626     Examples:
2627     ---
2628     auto alloc = newRegionAllocator();
2629     auto arr = alloc.array(iota(5));
2630     assert(arr == [0, 1, 2, 3, 4]);
2631     ---
2632     */
2633     Unqual!(ElementType!(R))[] array(R)(R range) if(isInputRange!R) {
2634         alias Unqual!(ElementType!(R)) E;
2635         if(gcScanned || !hasIndirections!E || isArray!R) {
2636             return arrayImplStack(range);
2637         } else {
2638             return arrayImplHeap(range);
2639         }
2640     }
2641 }
2642 
2643 /**
2644 Returns a new $(D RegionAllocator) that uses the default thread-local
2645 $(D RegionAllocatorStack) instance.
2646 */
2647 RegionAllocator newRegionAllocator() {
2648     return RegionAllocator(getThreadLocal());
2649 }
2650 
2651 // Finishes copying a range to a C heap allocated array.  Assumes the first
2652 // half of the input array is stuff already copied and the second half is
2653 // free space.
2654 private void finishCopy(T, U)(ref T[] result, U range, size_t alreadyCopied) {
2655     void doRealloc() {
2656         auto newPtr = cast(T*) alignedRealloc(
2657             result.ptr, result.length * T.sizeof * 2, result.length * T.sizeof
2658         );
2659         result = newPtr[0..result.length * 2];
2660     }
2661 
2662     auto index = alreadyCopied;
2663     foreach(elem; range) {
2664         if(index == result.length) doRealloc();
2665         result[index++] = elem;
2666     }
2667 
2668     result = result[0..index];
2669 }
2670 
2671 unittest {
2672     auto alloc = newRegionAllocator();
2673     auto arr = alloc.array(iota(5));
2674     assert(arr == [0, 1, 2, 3, 4]);
2675 
2676     // Create quick and dirty finite but lengthless range.
2677     static struct Count {
2678         uint num;
2679         uint upTo;
2680         @property size_t front() {
2681             return num;
2682         }
2683         void popFront() {
2684             num++;
2685         }
2686         @property bool empty() {
2687             return num >= upTo;
2688         }
2689     }
2690 
2691     alloc.allocate(1024 * 1024 * 3);
2692     Count count;
2693     count.upTo = 1024 * 1025;
2694     auto asArray = alloc.array(count);
2695     foreach(i, elem; asArray) {
2696         assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem));
2697     }
2698     assert(asArray.length == 1024 * 1025);
2699     alloc.freeLast();
2700     alloc.freeLast();
2701 
2702     while(alloc.stack.impl.refCountedPayload.freeList.index > 0) {
2703         alignedFree(alloc.stack.impl.refCountedPayload.freeList.pop());
2704     }
2705 }
2706 
2707 unittest {
2708     auto alloc = newRegionAllocator();
2709     double[] arr = alloc.uninitializedArray!(double[])(100);
2710     assert(arr.length == 100);
2711 
2712     double[][] matrix = alloc.uninitializedArray!(double[][])(42, 31);
2713     assert(matrix.length == 42);
2714     assert(matrix[0].length == 31);
2715 
2716     double[][] mat2 = alloc.newArray!(double[][])(3, 1);
2717     assert(mat2.length == 3);
2718     assert(mat2[0].length == 1);
2719 
2720     import std.math;
2721     assert(isNaN(mat2[0][0]));
2722 }
2723 
2724 unittest {
2725     /* Not a particularly good unittest in that it depends on knowing the
2726      * internals of RegionAllocator, but it's the best I could come up w/.  This
2727      * is really more of a stress test/sanity check than a normal unittest.*/
2728 
2729     // Make sure state is completely reset.
2730     destroy(threadLocalStack.impl);
2731     threadLocalStack = RegionAllocatorStack.init;
2732     threadLocalInitialized = false;
2733 
2734      // First test to make sure a large number of allocations does what it's
2735      // supposed to in terms of reallocing bookkeep[], etc.
2736      enum nIter =  defaultSegmentSize * 5 / alignBytes;
2737 
2738     {
2739          auto alloc = newRegionAllocator();
2740          foreach(i; 0..nIter) {
2741              alloc.allocate(alignBytes);
2742          }
2743          assert(alloc.stack.impl.refCountedPayload.nBlocks == 5,
2744             to!string(alloc.stack.impl.refCountedPayload.nBlocks));
2745          assert(alloc.stack.impl.refCountedPayload.nFree == 0);
2746          foreach(i; 0..nIter) {
2747             alloc.freeLast();
2748         }
2749         assert(alloc.stack.impl.refCountedPayload.nBlocks == 1);
2750         assert(alloc.stack.impl.refCountedPayload.nFree == 2);
2751 
2752         // Make sure logic for freeing excess blocks works.  If it doesn't this
2753         // test will run out of memory.
2754         enum allocSize = defaultSegmentSize / 2;
2755         foreach(i; 0..50) {
2756             foreach(j; 0..50) {
2757                 alloc.allocate(allocSize);
2758             }
2759             foreach(j; 0..50) {
2760                 alloc.freeLast();
2761             }
2762         }
2763 
2764         // Make sure data is stored properly.
2765         foreach(i; 0..10) {
2766             alloc.allocate(allocSize);
2767         }
2768         foreach(i; 0..5) {
2769             alloc.freeLast();
2770         }
2771         void* space = alloc.stack.impl.refCountedPayload.space;
2772         size_t used = alloc.stack.impl.refCountedPayload.used;
2773 
2774         {
2775             auto alloc2 = newRegionAllocator();
2776             auto arrays = new uint[][10];
2777 
2778             foreach(i; 0..10) {
2779                 uint[] data = alloc2.uninitializedArray!(uint[])(250_000);
2780                 foreach(j, ref e; data) {
2781                     e = cast(uint) (j * (i + 1));
2782                                     }
2783                 arrays[i] = data;
2784             }
2785 
2786             // Make stuff get overwrriten if blocks are getting GC'd when
2787             // they're not supposed to.
2788             GC.minimize;  // Free up all excess pools.
2789             uint[][] foo;
2790             foreach(i; 0..40) {
2791                 foo ~= new uint[1_048_576];
2792             }
2793             foo = null;
2794 
2795             for(size_t i = 9; i != size_t.max; i--) {
2796                 foreach(j, e; arrays[i]) {
2797                     assert(e == j * (i + 1));
2798                 }
2799             }
2800         }
2801 
2802         assert(space == alloc.stack.impl.refCountedPayload.space,
2803             text(space, '\t', alloc.stack.impl.refCountedPayload.space));
2804         assert(used == alloc.stack.impl.refCountedPayload.used,
2805             text(used, '\t', alloc.stack.impl.refCountedPayload.used));
2806         while(alloc.stack.impl.refCountedPayload.nBlocks > 1 ||
2807         alloc.stack.impl.refCountedPayload.used > 0) {
2808             alloc.freeLast();
2809         }
2810     }
2811 
2812     // Test that everything is really getting destroyed properly when
2813     // destroy() is called.  If not then this test will run out of memory.
2814     foreach(i; 0..1000) {
2815         destroy(threadLocalStack.impl);
2816         threadLocalInitialized = false;
2817 
2818         auto alloc = newRegionAllocator();
2819         foreach(j; 0..1_000) {
2820             auto ptr = alloc.allocate(20_000);
2821             assert((cast(size_t) ptr) % alignBytes == 0);
2822         }
2823 
2824         foreach(j; 0..500) {
2825             alloc.freeLast();
2826         }
2827     }
2828 }
2829 
2830 unittest {
2831     // Make sure the basics of using explicit stacks work.
2832     auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no);
2833     auto alloc = stack.newRegionAllocator();
2834     auto arr = alloc.array(iota(5));
2835     assert(arr == [0, 1, 2, 3, 4]);
2836     auto ptr = alloc.allocate(5);
2837 
2838     auto alloc2 = newRegionAllocator();
2839     auto ptr2 = alloc2.allocate(5);
2840     auto ptr3 = alloc.allocate(5);
2841 }
2842 
2843 unittest {
2844     // Make sure the stacks get freed properly when they go out of scope.
2845     // If they don't then this will run out of memory.
2846     foreach(i; 0..100_000) {
2847         auto stack = RegionAllocatorStack(4 * 1024 * 1024, GCScan.no);
2848     }
2849 }
2850 
2851 unittest {
2852     // Make sure resize works properly.
2853     auto alloc = newRegionAllocator();
2854     auto arr1 = alloc.array(iota(4));
2855     auto res = alloc.resize(arr1.ptr, 8 * int.sizeof);
2856     assert(res);
2857     arr1 = arr1.ptr[0..8];
2858     copy(iota(4, 8), arr1[4..$]);
2859 
2860     auto arr2 = alloc.newArray!(int[])(8);
2861 
2862     // If resizing resizes to something too small, this will have been
2863     // overwritten:
2864     assert(arr1 == [0, 1, 2, 3, 4, 5, 6, 7], text(arr1));
2865 
2866     alloc.free(arr2.ptr);
2867     auto res2 = alloc.resize(arr1.ptr, 4 * int.sizeof);
2868     assert(res2);
2869     arr2 = alloc.newArray!(int[])(8);
2870 
2871     // This time the memory in arr1 should have been overwritten.
2872     assert(arr1 == [0, 1, 2, 3, 0, 0, 0, 0]);
2873 }
2874 
2875 unittest {
2876     // Make sure that default thread-local stacks get freed properly at the
2877     // termination of a thread.  If they don't then this will run out of
2878     // memory.
2879 
2880     import core.thread;
2881     foreach(i; 0..100) {
2882         auto fun = {
2883             threadLocalSegmentSize = 100 * 1024 * 1024;
2884             newRegionAllocator();
2885         };
2886 
2887         auto t = new Thread(fun);
2888         t.start();
2889         t.join();
2890     }
2891 }
2892 
2893 unittest {
2894     // Make sure assignment works as advertised.
2895     RegionAllocator alloc;
2896     auto alloc2 = newRegionAllocator();
2897     auto ptr = alloc2.allocate(8);
2898     alloc = alloc2;
2899     alloc.freeLast();
2900     auto ptr2= alloc2.allocate(8);
2901     assert(ptr is ptr2);
2902 }
2903 
2904  // Simple, fast stack w/o error checking.
2905 static struct SimpleStack(T) {
2906     private size_t capacity;
2907     private size_t index;
2908     private T* data;
2909     private enum sz = T.sizeof;
2910 
2911     private static size_t max(size_t lhs, size_t rhs) pure nothrow {
2912         return (rhs > lhs) ? rhs : lhs;
2913     }
2914 
2915     void push(T elem) {
2916         if (capacity == index) {
2917             capacity = max(16, capacity * 2);
2918             data = cast(T*) core.stdc.stdlib.realloc(data, capacity * sz);
2919         }
2920         data[index++] = elem;
2921     }
2922 
2923     T pop() {
2924         index--;
2925         auto ret = data[index];
2926         return ret;
2927     }
2928 
2929     void destroy() {
2930         if(data) {
2931             core.stdc.stdlib.free(data);
2932             data = null;
2933         }
2934     }
2935 }
2936 
2937 private  void outOfMemory()  {
2938     throw new OutOfMemoryError("Out of memory in RegionAllocator.");
2939 }
2940 
2941 // Memory allocation routines.  These wrap allocate(), free() and realloc(),
2942 // and guarantee alignment.
2943 private enum size_t alignBytes = 16;
2944 
2945 private void* alignedMalloc(size_t size, bool shouldAddRange = false) {
2946     // We need (alignBytes - 1) extra bytes to guarantee alignment, 1 byte
2947     // to store the shouldAddRange flag, and ptrSize bytes to store
2948     // the pointer to the beginning of the block.
2949     void* toFree = core.stdc.stdlib.malloc(
2950         alignBytes + ptrSize + size
2951     );
2952 
2953     if(toFree is null) outOfMemory();
2954 
2955     // Add the offset for the flag and the base pointer.
2956     auto intPtr = cast(size_t) toFree + ptrSize + 1;
2957 
2958     // Align it.
2959     intPtr = (intPtr + alignBytes - 1) & (~(alignBytes - 1));
2960     auto ret = cast(void**) intPtr;
2961 
2962     // Store base pointer.
2963     (cast(void**) ret)[-1] = toFree;
2964 
2965     // Store flag.
2966     (cast(bool*) ret)[-1 - ptrSize] = shouldAddRange;
2967 
2968     if(shouldAddRange) {
2969         GC.addRange(ret, size);
2970     }
2971 
2972     return ret;
2973 }
2974 
2975 private void alignedFree(void* ptr) {
2976     // If it was allocated with alignedMalloc() then the pointer to the
2977     // beginning is at ptr[-1].
2978     auto addedRange = (cast(bool*) ptr)[-1 - ptrSize];
2979 
2980     if(addedRange) {
2981         GC.removeRange(ptr);
2982     }
2983 
2984     core.stdc.stdlib.free( (cast(void**) ptr)[-1]);
2985 }
2986 
2987 // This is used by RegionAllocator, but I'm not sure enough that its interface
2988 // isn't going to change to make it public and document it.
2989 private void* alignedRealloc(void* ptr, size_t newLen, size_t oldLen) {
2990     auto storedRange = (cast(bool*) ptr)[-1 - ptrSize];
2991     auto newPtr = alignedMalloc(newLen, storedRange);
2992     memcpy(newPtr, ptr, oldLen);
2993 
2994     alignedFree(ptr);
2995     return newPtr;
2996 }
2997 
2998 private union SizetPtr {
2999     size_t i;
3000     void* p;
3001 }