1 // Written in the D programming language. 2 /** 3 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations 4 and preserving a low degree of fragmentation by means of aligned allocations. 5 6 Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/aligned_block_list.d) 7 */ 8 module std.experimental.allocator.building_blocks.aligned_block_list; 9 10 import std.experimental.allocator.common; 11 import std.experimental.allocator.building_blocks.null_allocator; 12 13 // Common function implementation for thread local and shared AlignedBlockList 14 private mixin template AlignedBlockListImpl(bool isShared) 15 { 16 import std.traits : hasMember; 17 import std.typecons : Ternary; 18 19 static if (isShared) 20 import core.internal.spinlock : SpinLock; 21 22 private: 23 // Doubly linked list of 'AlignedBlockNode' 24 // Each node contains an `Allocator` followed by its payload 25 static struct AlignedBlockNode 26 { 27 AlignedBlockNode* next, prev; 28 Allocator bAlloc; 29 30 static if (isShared) 31 { 32 shared(size_t) bytesUsed; 33 // Since the lock is not taken when allocating, this acts like a refcount 34 // keeping the node alive 35 uint keepAlive; 36 } 37 else 38 { 39 size_t bytesUsed; 40 } 41 } 42 43 // Root of the internal doubly linked list 44 AlignedBlockNode* root; 45 46 // Number of active nodes 47 uint numNodes; 48 49 // If the numNodes exceeds this limit, we will start deallocating nodes 50 enum uint maxNodes = 64; 51 52 // This lock is always taken when changing the list 53 // To improve performance, the lock is not taken when the allocation logic is called 54 static if (isShared) 55 SpinLock lock = SpinLock(SpinLock.Contention.brief); 56 57 // Moves a node to the front of the list, allowing for quick allocations 58 void moveToFront(AlignedBlockNode* tmp) 59 { 60 auto localRoot = cast(AlignedBlockNode*) root; 61 if (tmp == localRoot) 62 return; 63 64 if (tmp.prev) tmp.prev.next = tmp.next; 65 if (tmp.next) tmp.next.prev = tmp.prev; 66 if (localRoot) localRoot.prev = tmp; 67 tmp.next = localRoot; 68 tmp.prev = null; 69 70 root = cast(typeof(root)) tmp; 71 } 72 73 // Removes a node from the list, including its payload 74 // The payload is deallocated by calling 'parent.deallocate' 75 void removeNode(AlignedBlockNode* tmp) 76 { 77 auto next = tmp.next; 78 if (tmp.prev) tmp.prev.next = tmp.next; 79 if (tmp.next) tmp.next.prev = tmp.prev; 80 parent.deallocate((cast(void*) tmp)[0 .. theAlignment]); 81 82 if (tmp == cast(AlignedBlockNode*) root) 83 root = cast(typeof(root)) next; 84 85 static if (isShared) 86 { 87 import core.atomic : atomicOp; 88 atomicOp!"-="(numNodes, 1); 89 } 90 else 91 { 92 numNodes--; 93 } 94 } 95 96 // If the nodes do not have available space, a new node is created 97 // by drawing memory from the parent allocator with aligned allocations. 98 // The new node is inserted at the front of the list 99 bool insertNewNode() 100 { 101 void[] buf = parent.alignedAllocate(theAlignment, theAlignment); 102 if (buf is null) 103 return false; 104 105 auto localRoot = cast(AlignedBlockNode*) root; 106 auto newNode = cast(AlignedBlockNode*) buf; 107 108 // The first part of the allocation represent the node contents 109 // followed by the actual payload 110 ubyte[] payload = cast(ubyte[]) buf[AlignedBlockNode.sizeof .. $]; 111 newNode.bAlloc = Allocator(payload); 112 113 newNode.next = localRoot; 114 newNode.prev = null; 115 if (localRoot) 116 localRoot.prev = newNode; 117 root = cast(typeof(root)) newNode; 118 119 static if (isShared) 120 { 121 import core.atomic : atomicOp; 122 atomicOp!"+="(numNodes, 1); 123 } 124 else 125 { 126 numNodes++; 127 } 128 129 return true; 130 } 131 132 public: 133 static if (stateSize!ParentAllocator) ParentAllocator parent; 134 else alias parent = ParentAllocator.instance; 135 136 enum ulong alignment = Allocator.alignment; 137 138 // Since all memory is drawn from ParentAllocator, we can 139 // forward this to the parent 140 static if (hasMember!(ParentAllocator, "owns")) 141 Ternary owns(void[] b) 142 { 143 return parent.owns(b); 144 } 145 146 // Use `theAlignment` to find the node which allocated this block 147 bool deallocate(void[] b) 148 { 149 if (b is null) 150 return true; 151 152 // Round buffer to nearest `theAlignment` multiple to quickly find 153 // the `parent` `AlignedBlockNode` 154 enum ulong mask = ~(theAlignment - 1); 155 ulong ptr = ((cast(ulong) b.ptr) & mask); 156 AlignedBlockNode *node = cast(AlignedBlockNode*) ptr; 157 if (node.bAlloc.deallocate(b)) 158 { 159 static if (isShared) 160 { 161 import core.atomic : atomicOp; 162 atomicOp!"-="(node.bytesUsed, b.length); 163 } 164 else 165 { 166 node.bytesUsed -= b.length; 167 } 168 return true; 169 } 170 return false; 171 } 172 173 // Allocate works only if memory can be provided via `alignedAllocate` from the parent 174 static if (hasMember!(ParentAllocator, "alignedAllocate")) 175 void[] allocate(size_t n) 176 { 177 static if (isShared) 178 import core.atomic : atomicOp, atomicLoad; 179 180 if (n == 0 || n > theAlignment) 181 return null; 182 183 static if (isShared) 184 { 185 lock.lock(); 186 scope(exit) lock.unlock(); 187 } 188 189 auto tmp = cast(AlignedBlockNode*) root; 190 191 // Iterate through list and find first node which has memory available 192 while (tmp) 193 { 194 auto next = tmp.next; 195 static if (isShared) 196 { 197 // Allocations can happen outside the lock 198 // Make sure nobody deletes this node while using it 199 tmp.keepAlive++; 200 if (next) next.keepAlive++; 201 lock.unlock(); 202 } 203 204 auto result = tmp.bAlloc.allocate(n); 205 if (result.length == n) 206 { 207 // Success 208 static if (isShared) 209 { 210 atomicOp!"+="(tmp.bytesUsed, n); 211 lock.lock(); 212 } 213 else 214 { 215 tmp.bytesUsed += n; 216 } 217 218 // Most likely this node has memory for more allocations 219 // Move it to the front 220 moveToFront(tmp); 221 222 static if (isShared) 223 { 224 tmp.keepAlive--; 225 if (next) next.keepAlive--; 226 } 227 228 return result; 229 } 230 231 // This node can now be removed if necessary 232 static if (isShared) 233 { 234 lock.lock(); 235 tmp.keepAlive--; 236 if (next) next.keepAlive--; 237 } 238 239 if (!next) 240 break; 241 242 tmp = next; 243 next = tmp.next; 244 245 // If there are too many nodes, free memory by removing empty nodes 246 static if (isShared) 247 { 248 if (atomicLoad(numNodes) > maxNodes && 249 atomicLoad(tmp.bytesUsed) == 0 && 250 tmp.keepAlive == 0) 251 { 252 removeNode(tmp); 253 } 254 } 255 else 256 { 257 if (numNodes > maxNodes && tmp.bytesUsed == 0) 258 { 259 removeNode(tmp); 260 } 261 } 262 263 tmp = next; 264 } 265 266 // Cannot create new AlignedBlockNode. Most likely the ParentAllocator ran out of resources 267 if (!insertNewNode()) 268 return null; 269 270 tmp = cast(typeof(tmp)) root; 271 void[] result = tmp.bAlloc.allocate(n); 272 273 static if (isShared) 274 { 275 atomicOp!"+="(root.bytesUsed, result.length); 276 } 277 else 278 { 279 root.bytesUsed += result.length; 280 } 281 282 return result; 283 } 284 285 // goodAllocSize should not use state 286 size_t goodAllocSize(const size_t n) 287 { 288 Allocator a = null; 289 return a.goodAllocSize(n); 290 } 291 } 292 293 /** 294 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations 295 and preserving a low degree of fragmentation. 296 The allocator holds internally a doubly linked list of `Allocator` objects, which will serve allocations 297 in a most-recently-used fashion. Most recent allocators used for `allocate` calls, will be 298 moved to the front of the list. 299 300 Although allocations are in theory served in linear searching time, `deallocate` calls take 301 $(BIGOH 1) time, by using aligned allocations. `ParentAllocator` must implement `alignedAllocate` 302 and it must be able to allocate `theAlignment` bytes at the same alignment. Each aligned allocation 303 done by `ParentAllocator` will contain metadata for an `Allocator`, followed by its payload. 304 305 Params: 306 Allocator = the allocator which is used to manage each node; it must have a constructor which receives 307 `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator` 308 ParentAllocator = each node draws memory from the parent allocator; it must support `alignedAllocate` 309 theAlignment = alignment of each block and at the same time length of each node 310 */ 311 struct AlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21)) 312 { 313 version (StdDdoc) 314 { 315 import std.typecons : Ternary; 316 import std.traits : hasMember; 317 318 /** 319 Returns a chunk of memory of size `n` 320 It finds the first node in the `AlignedBlockNode` list which has available memory, 321 and moves it to the front of the list. 322 323 All empty nodes which cannot return new memory, are removed from the list. 324 325 Params: 326 n = bytes to allocate 327 Returns: 328 A chunk of memory of the required length or `null` on failure or 329 */ 330 static if (hasMember!(ParentAllocator, "alignedAllocate")) 331 void[] allocate(size_t n); 332 333 /** 334 Deallocates the buffer `b` given as parameter. Deallocations take place in constant 335 time, regardless of the number of nodes in the list. `b.ptr` is rounded down 336 to the nearest multiple of the `alignment` to quickly find the corresponding 337 `AlignedBlockNode`. 338 339 Params: 340 b = buffer candidate for deallocation 341 Returns: 342 `true` on success and `false` on failure 343 */ 344 bool deallocate(void[] b); 345 346 /** 347 Returns `Ternary.yes` if the buffer belongs to the parent allocator and 348 `Ternary.no` otherwise. 349 350 Params: 351 b = buffer tested if owned by this allocator 352 Returns: 353 `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise 354 */ 355 static if (hasMember!(ParentAllocator, "owns")) 356 Ternary owns(void[] b); 357 } 358 else 359 { 360 import std.math.traits : isPowerOf2; 361 static assert(isPowerOf2(alignment)); 362 mixin AlignedBlockListImpl!false; 363 } 364 } 365 366 /// 367 @system unittest 368 { 369 import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator; 370 import std.experimental.allocator.building_blocks.segregator : Segregator; 371 import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock; 372 import std.typecons : Ternary; 373 374 /* 375 In this example we use 'AlignedBlockList' in conjunction with other allocators 376 in order to create a more complex allocator. 377 378 The 'SuperAllocator' uses a 'Segregator' to distribute allocations to sub-allocators, 379 based on the requested size. 380 381 Each sub-allocator is represented by an 'AlignedBlockList' of 'BitmappedBlocks'. 382 Each 'AlignedBlockList' draws memory from a root allocator which in this case is an 'AscendingPageAllocator' 383 384 Such an allocator not only provides good performance, but also a low degree of memory fragmentation. 385 */ 386 alias SuperAllocator = Segregator!( 387 32, 388 AlignedBlockList!(BitmappedBlock!32, AscendingPageAllocator*, 1 << 12), 389 Segregator!( 390 391 64, 392 AlignedBlockList!(BitmappedBlock!64, AscendingPageAllocator*, 1 << 12), 393 Segregator!( 394 395 128, 396 AlignedBlockList!(BitmappedBlock!128, AscendingPageAllocator*, 1 << 12), 397 AscendingPageAllocator* 398 ))); 399 400 SuperAllocator a; 401 auto pageAlloc = AscendingPageAllocator(128 * 4096); 402 403 // Set the parent allocator for all the sub allocators 404 a.allocatorForSize!256 = &pageAlloc; 405 a.allocatorForSize!128.parent = &pageAlloc; 406 a.allocatorForSize!64.parent = &pageAlloc; 407 a.allocatorForSize!32.parent = &pageAlloc; 408 409 enum testNum = 10; 410 void[][testNum] buf; 411 412 // Allocations of size 32 will go to the first 'AlignedBlockList' 413 foreach (j; 0 .. testNum) 414 { 415 buf[j] = a.allocate(32); 416 assert(buf[j].length == 32); 417 418 // This is owned by the first 'AlignedBlockList' 419 assert(a.allocatorForSize!32.owns(buf[j]) == Ternary.yes); 420 } 421 422 // Free the memory 423 foreach (j; 0 .. testNum) 424 assert(a.deallocate(buf[j])); 425 426 // Allocations of size 64 will go to the second 'AlignedBlockList' 427 foreach (j; 0 .. testNum) 428 { 429 buf[j] = a.allocate(64); 430 assert(buf[j].length == 64); 431 432 // This is owned by the second 'AlignedBlockList' 433 assert(a.allocatorForSize!64.owns(buf[j]) == Ternary.yes); 434 } 435 436 // Free the memory 437 foreach (j; 0 .. testNum) 438 assert(a.deallocate(buf[j])); 439 440 // Allocations of size 128 will go to the third 'AlignedBlockList' 441 foreach (j; 0 .. testNum) 442 { 443 buf[j] = a.allocate(128); 444 assert(buf[j].length == 128); 445 446 // This is owned by the third 'AlignedBlockList' 447 assert(a.allocatorForSize!128.owns(buf[j]) == Ternary.yes); 448 } 449 450 // Free the memory 451 foreach (j; 0 .. testNum) 452 assert(a.deallocate(buf[j])); 453 454 // Allocations which exceed 128, will go to the 'AscendingPageAllocator*' 455 void[] b = a.allocate(256); 456 assert(b.length == 256); 457 a.deallocate(b); 458 } 459 460 /** 461 `SharedAlignedBlockList` is the threadsafe version of `AlignedBlockList`. 462 The `Allocator` template parameter must refer a shared allocator. 463 Also, `ParentAllocator` must be a shared allocator, supporting `alignedAllocate`. 464 465 Params: 466 Allocator = the shared allocator which is used to manage each node; it must have a constructor which receives 467 `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator` 468 ParentAllocator = each node draws memory from the parent allocator; it must be shared and support `alignedAllocate` 469 theAlignment = alignment of each block and at the same time length of each node 470 */ 471 shared struct SharedAlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21)) 472 { 473 version (StdDdoc) 474 { 475 import std.typecons : Ternary; 476 import std.traits : hasMember; 477 478 /** 479 Returns a chunk of memory of size `n` 480 It finds the first node in the `AlignedBlockNode` list which has available memory, 481 and moves it to the front of the list. 482 483 All empty nodes which cannot return new memory, are removed from the list. 484 485 Params: 486 n = bytes to allocate 487 Returns: 488 A chunk of memory of the required length or `null` on failure or 489 */ 490 static if (hasMember!(ParentAllocator, "alignedAllocate")) 491 void[] allocate(size_t n); 492 493 /** 494 Deallocates the buffer `b` given as parameter. Deallocations take place in constant 495 time, regardless of the number of nodes in the list. `b.ptr` is rounded down 496 to the nearest multiple of the `alignment` to quickly find the corresponding 497 `AlignedBlockNode`. 498 499 Params: 500 b = buffer candidate for deallocation 501 Returns: 502 `true` on success and `false` on failure 503 */ 504 bool deallocate(void[] b); 505 506 /** 507 Returns `Ternary.yes` if the buffer belongs to the parent allocator and 508 `Ternary.no` otherwise. 509 510 Params: 511 b = buffer tested if owned by this allocator 512 Returns: 513 `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise 514 */ 515 static if (hasMember!(ParentAllocator, "owns")) 516 Ternary owns(void[] b); 517 } 518 else 519 { 520 import std.math.traits : isPowerOf2; 521 static assert(isPowerOf2(alignment)); 522 mixin AlignedBlockListImpl!true; 523 } 524 } 525 526 /// 527 @system unittest 528 { 529 import std.experimental.allocator.building_blocks.region : SharedBorrowedRegion; 530 import std.experimental.allocator.building_blocks.ascending_page_allocator : SharedAscendingPageAllocator; 531 import std.experimental.allocator.building_blocks.null_allocator : NullAllocator; 532 import core.thread : ThreadGroup; 533 534 enum numThreads = 8; 535 enum size = 2048; 536 enum maxIter = 10; 537 538 /* 539 In this example we use 'SharedAlignedBlockList' together with 540 'SharedBorrowedRegion', in order to create a fast, thread-safe allocator. 541 */ 542 alias SuperAllocator = SharedAlignedBlockList!( 543 SharedBorrowedRegion!(1), 544 SharedAscendingPageAllocator, 545 4096); 546 547 SuperAllocator a; 548 // The 'SuperAllocator' will draw memory from a 'SharedAscendingPageAllocator' 549 a.parent = SharedAscendingPageAllocator(4096 * 1024); 550 551 // Launch 'numThreads', each performing allocations 552 void fun() 553 { 554 foreach (i; 0 .. maxIter) 555 { 556 void[] b = a.allocate(size); 557 assert(b.length == size); 558 } 559 } 560 561 auto tg = new ThreadGroup; 562 foreach (i; 0 .. numThreads) 563 { 564 tg.create(&fun); 565 } 566 tg.joinAll(); 567 } 568 569 version (StdUnittest) 570 { 571 static void testrw(void[] b) 572 { 573 ubyte* buf = cast(ubyte*) b.ptr; 574 size_t len = (b.length).roundUpToMultipleOf(4096); 575 for (int i = 0; i < len; i += 4096) 576 { 577 buf[i] = (cast(ubyte) i % 256); 578 assert(buf[i] == (cast(ubyte) i % 256)); 579 } 580 } 581 } 582 583 @system unittest 584 { 585 import std.experimental.allocator.building_blocks.region; 586 import std.experimental.allocator.building_blocks.ascending_page_allocator; 587 import std.random; 588 import std.algorithm.sorting : sort; 589 import core.thread : ThreadGroup; 590 import core.internal.spinlock : SpinLock; 591 592 enum pageSize = 4096; 593 enum numThreads = 10; 594 enum maxIter = 20; 595 enum totalAllocs = maxIter * numThreads; 596 size_t count = 0; 597 SpinLock lock = SpinLock(SpinLock.Contention.brief); 598 599 alias SuperAllocator = SharedAlignedBlockList!( 600 SharedBorrowedRegion!(1), 601 SharedAscendingPageAllocator, 602 1 << 16); 603 void[][totalAllocs] buf; 604 605 SuperAllocator a; 606 a.parent = SharedAscendingPageAllocator(4096 * 1024); 607 608 void fun() 609 { 610 auto rnd = Random(1000); 611 612 foreach (i; 0 .. maxIter) 613 { 614 auto size = uniform(1, pageSize + 1, rnd); 615 void[] b = a.allocate(size); 616 assert(b.length == size); 617 testrw(b); 618 619 lock.lock(); 620 buf[count++] = b; 621 lock.unlock(); 622 } 623 } 624 auto tg = new ThreadGroup; 625 foreach (i; 0 .. numThreads) 626 { 627 tg.create(&fun); 628 } 629 tg.joinAll(); 630 631 sort!((a, b) => a.ptr < b.ptr)(buf[0 .. totalAllocs]); 632 foreach (i; 0 .. totalAllocs - 1) 633 { 634 assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr); 635 } 636 637 foreach (i; 0 .. totalAllocs) 638 { 639 assert(a.deallocate(buf[totalAllocs - 1 - i])); 640 } 641 } 642 643 @system unittest 644 { 645 import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator; 646 import std.experimental.allocator.building_blocks.segregator : Segregator; 647 import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock; 648 import std.random; 649 650 alias SuperAllocator = Segregator!( 651 256, 652 AlignedBlockList!(BitmappedBlock!256, AscendingPageAllocator*, 1 << 16), 653 Segregator!( 654 655 512, 656 AlignedBlockList!(BitmappedBlock!512, AscendingPageAllocator*, 1 << 16), 657 Segregator!( 658 659 1024, 660 AlignedBlockList!(BitmappedBlock!1024, AscendingPageAllocator*, 1 << 16), 661 Segregator!( 662 663 2048, 664 AlignedBlockList!(BitmappedBlock!2048, AscendingPageAllocator*, 1 << 16), 665 AscendingPageAllocator* 666 )))); 667 668 SuperAllocator a; 669 auto pageAlloc = AscendingPageAllocator(4096 * 4096); 670 a.allocatorForSize!4096 = &pageAlloc; 671 a.allocatorForSize!2048.parent = &pageAlloc; 672 a.allocatorForSize!1024.parent = &pageAlloc; 673 a.allocatorForSize!512.parent = &pageAlloc; 674 a.allocatorForSize!256.parent = &pageAlloc; 675 676 auto rnd = Random(1000); 677 678 size_t maxIter = 10; 679 enum testNum = 10; 680 void[][testNum] buf; 681 int maxSize = 8192; 682 foreach (i; 0 .. maxIter) 683 { 684 foreach (j; 0 .. testNum) 685 { 686 auto size = uniform(1, maxSize + 1, rnd); 687 buf[j] = a.allocate(size); 688 assert(buf[j].length == size); 689 testrw(buf[j]); 690 } 691 692 randomShuffle(buf[]); 693 694 foreach (j; 0 .. testNum) 695 { 696 assert(a.deallocate(buf[j])); 697 } 698 } 699 }