1 /** 2 * Open-Addressed Hash Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module containers.openhashset; 8 9 private import containers.internal.hash; 10 private import containers.internal.node : shouldAddGCRange; 11 private import std.experimental.allocator.common : stateSize; 12 private import std.experimental.allocator.mallocator : Mallocator; 13 14 /** 15 * Simple open-addressed hash set that uses linear probing to resolve sollisions. 16 * 17 * Params: 18 * T = the element type of the hash set 19 * Allocator = the allocator to use. Defaults to `Mallocator`. 20 * hashFunction = the hash function to use 21 * supportGC = if true, calls to GC.addRange and GC.removeRange will be used 22 * to ensure that the GC does not accidentally free memory owned by this 23 * container. 24 */ 25 struct OpenHashSet(T, Allocator = Mallocator, 26 alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 27 { 28 /** 29 * Disallow copy construction 30 */ 31 this(this) @disable; 32 33 static if (stateSize!Allocator != 0) 34 { 35 this() @disable; 36 37 /** 38 * Use the given `allocator` for allocations. 39 */ 40 this(Allocator allocator) 41 in 42 { 43 static if (is(typeof(allocator is null))) 44 assert(allocator !is null, "Allocator must not be null"); 45 } 46 do 47 { 48 this.allocator = allocator; 49 } 50 51 /** 52 * Initializes the hash set with the given initial capacity. 53 * 54 * Params: 55 * initialCapacity = the initial capacity for the hash set 56 * allocator = allocator to use for allocations 57 */ 58 this(size_t initialCapacity, Allocator allocator) 59 in 60 { 61 static if (is(typeof(allocator is null))) 62 assert(allocator !is null, "Allocator must not be null"); 63 assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2"); 64 } 65 do 66 { 67 this.allocator = allocator; 68 initialize(initialCapacity); 69 } 70 } 71 else 72 { 73 /** 74 * Initializes the hash set with the given initial capacity. 75 * 76 * Params: 77 * initialCapacity = the initial capacity for the hash set 78 */ 79 this(size_t initialCapacity) 80 in 81 { 82 assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2"); 83 } 84 do 85 { 86 initialize(initialCapacity); 87 } 88 } 89 90 ~this() nothrow 91 { 92 static if (useGC) 93 GC.removeRange(nodes.ptr); 94 allocator.deallocate(nodes); 95 } 96 97 /** 98 * Removes all items from the hash set. 99 */ 100 void clear() 101 { 102 if (empty) 103 return; 104 foreach (ref node; nodes) 105 { 106 typeid(typeof(node)).destroy(&node); 107 node.used = false; 108 } 109 _length = 0; 110 } 111 112 /// 113 bool empty() const pure nothrow @nogc @safe @property 114 { 115 return _length == 0; 116 } 117 118 /// 119 size_t length() const pure nothrow @nogc @safe @property 120 { 121 return _length; 122 } 123 124 /** 125 * Returns: 126 * $(B true) if the hash set contains the given item, false otherwise. 127 */ 128 bool contains(T item) const 129 { 130 if (empty) 131 return false; 132 immutable size_t hash = hashFunction(item); 133 size_t index = toIndex(nodes, item, hash); 134 if (index == size_t.max) 135 return false; 136 return nodes[index].hash == hash && nodes[index].data == item; 137 } 138 139 /// ditto 140 bool opBinaryRight(string op)(T item) inout if (op == "in") 141 { 142 return contains(item); 143 } 144 145 /** 146 * Inserts the given item into the set. 147 * 148 * Returns: 149 * $(B true) if the item was inserted, false if it was already present. 150 */ 151 bool insert(T item) 152 { 153 if (nodes.length == 0) 154 initialize(DEFAULT_BUCKET_COUNT); 155 immutable size_t hash = hashFunction(item); 156 size_t index = toIndex(nodes, item, hash); 157 if (index == size_t.max) 158 { 159 grow(); 160 index = toIndex(nodes, item, hash); 161 } 162 else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item) 163 return false; 164 nodes[index].used = true; 165 nodes[index].hash = hash; 166 nodes[index].data = item; 167 _length++; 168 return true; 169 } 170 171 /// ditto 172 alias put = insert; 173 174 /// ditto 175 alias insertAnywhere = insert; 176 177 /// ditto 178 bool opOpAssign(string op)(T item) if (op == "~") 179 { 180 return insert(item); 181 } 182 183 /** 184 * Params: 185 * item = the item to remove 186 * Returns: 187 * $(B true) if the item was removed, $(B false) if it was not present 188 */ 189 bool remove(T item) 190 { 191 if (empty) 192 return false; 193 immutable size_t hash = hashFunction(item); 194 size_t index = toIndex(nodes, item, hash); 195 if (index == size_t.max) 196 return false; 197 nodes[index].used = false; 198 destroy(nodes[index].data); 199 _length--; 200 return true; 201 } 202 203 /** 204 * Returns: 205 * A range over the set. 206 */ 207 auto opSlice(this This)() nothrow pure @nogc @safe 208 { 209 return Range!(This)(nodes); 210 } 211 212 mixin AllocatorState!Allocator; 213 214 private: 215 216 import containers.internal.element_type : ContainerElementType; 217 import containers.internal.mixins : AllocatorState; 218 import containers.internal.storage_type : ContainerStorageType; 219 import core.memory : GC; 220 221 enum bool useGC = supportGC && shouldAddGCRange!T; 222 223 static struct Range(ThisT) 224 { 225 ET front() 226 { 227 return cast(typeof(return)) nodes[index].data; 228 } 229 230 bool empty() const pure nothrow @safe @nogc @property 231 { 232 return index >= nodes.length; 233 } 234 235 void popFront() pure nothrow @safe @nogc 236 { 237 index++; 238 while (index < nodes.length && !nodes[index].used) 239 index++; 240 } 241 242 private: 243 244 alias ET = ContainerElementType!(ThisT, T); 245 246 this(const Node[] nodes) 247 { 248 this.nodes = nodes; 249 while (true) 250 { 251 if (index >= nodes.length || nodes[index].used) 252 break; 253 index++; 254 } 255 } 256 257 size_t index; 258 const Node[] nodes; 259 } 260 261 void grow() 262 { 263 immutable size_t newCapacity = nodes.length << 1; 264 Node[] newNodes = (cast (Node*) allocator.allocate(newCapacity * Node.sizeof)) 265 [0 .. newCapacity]; 266 newNodes[] = Node.init; 267 static if (useGC) 268 GC.addRange(newNodes.ptr, newNodes.length * Node.sizeof, typeid(typeof(nodes))); 269 foreach (ref node; nodes) 270 { 271 immutable size_t newIndex = toIndex(newNodes, node.data, node.hash); 272 newNodes[newIndex] = node; 273 } 274 static if (useGC) 275 GC.removeRange(nodes.ptr); 276 allocator.deallocate(nodes); 277 nodes = newNodes; 278 } 279 280 void initialize(size_t nodeCount) 281 { 282 nodes = (cast (Node*) allocator.allocate(nodeCount * Node.sizeof))[0 .. nodeCount]; 283 static if (useGC) 284 GC.addRange(nodes.ptr, nodes.length * Node.sizeof, typeid(typeof(nodes))); 285 nodes[] = Node.init; 286 _length = 0; 287 } 288 289 // Returns: size_t.max if the item was not found 290 static size_t toIndex(const Node[] n, T item, size_t hash) 291 { 292 assert (n.length > 0, "Empty node"); 293 immutable size_t index = hashToIndex(hash, n.length); 294 size_t i = index; 295 immutable bucketMask = n.length - 1; 296 while (n[i].used && n[i].data != item) 297 { 298 i = (i + 1) & bucketMask; 299 if (i == index) 300 return size_t.max; 301 } 302 return i; 303 } 304 305 Node[] nodes; 306 size_t _length; 307 308 static struct Node 309 { 310 ContainerStorageType!T data; 311 bool used; 312 size_t hash; 313 } 314 } 315 316 version(emsi_containers_unittest) unittest 317 { 318 import std.string : format; 319 import std.algorithm : equal, sort; 320 import std.range : iota; 321 import std.array : array; 322 OpenHashSet!int ints; 323 assert (ints.empty); 324 assert (equal(ints[], cast(int[]) [])); 325 ints.clear(); 326 ints.insert(10); 327 assert (!ints.empty); 328 assert (ints.length == 1); 329 assert (equal(ints[], [10])); 330 assert (ints.contains(10)); 331 ints.clear(); 332 assert (ints.length == 0); 333 assert (ints.empty); 334 ints ~= 0; 335 assert (!ints.empty); 336 assert (ints.length == 1); 337 assert (equal(ints[], [0])); 338 ints.clear(); 339 assert (ints.length == 0); 340 assert (ints.empty); 341 foreach (i; 0 .. 100) 342 ints ~= i; 343 assert (ints.length == 100, "%d".format(ints.length)); 344 assert (!ints.empty); 345 foreach (i; 0 .. 100) 346 assert (i in ints); 347 assert (equal(ints[].array().sort(), iota(0, 100))); 348 assert (ints.insert(10) == false); 349 auto ohs = OpenHashSet!int(8); 350 assert (!ohs.remove(1000)); 351 assert (ohs.contains(99) == false); 352 assert (ohs.insert(10) == true); 353 assert (ohs.insert(10) == false); 354 foreach (i; 0 .. 7) 355 ohs.insert(i); 356 assert (ohs.contains(6)); 357 assert (!ohs.contains(100)); 358 assert (!ohs.remove(9999)); 359 assert (ohs.remove(0)); 360 assert (ohs.remove(1)); 361 } 362 363 version(emsi_containers_unittest) unittest 364 { 365 static class Foo 366 { 367 string name; 368 369 override bool opEquals(Object other) const @safe pure nothrow @nogc 370 { 371 Foo f = cast(Foo)other; 372 return f !is null && f.name == this.name; 373 } 374 } 375 376 hash_t stringToHash(string str) @safe pure nothrow @nogc 377 { 378 hash_t hash = 5381; 379 return hash; 380 } 381 382 hash_t FooToHash(Foo e) pure @safe nothrow @nogc 383 { 384 return stringToHash(e.name); 385 } 386 387 OpenHashSet!(Foo, Mallocator, FooToHash) hs; 388 auto f = new Foo; 389 hs.insert(f); 390 assert(f in hs); 391 auto r = hs[]; 392 }