1 /** 2 * This module contains a collection of bit-level operations. 3 * 4 * Copyright: Copyright Don Clugston 2005 - 2013. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 * Authors: Don Clugston, Sean Kelly, Walter Bright, Alex Rønne Petersen, Thomas Stuart Bockman 7 * Source: $(DRUNTIMESRC core/_bitop.d) 8 * 9 * Some of the LDC-specific parts came »From GDC ... public domain!« 10 */ 11 12 module core.bitop; 13 14 nothrow: 15 @safe: 16 @nogc: 17 18 version (LDC) 19 { 20 import ldc.intrinsics; 21 // Do not use the DMD inline assembler. 22 } 23 else version (D_InlineAsm_X86_64) 24 version = AsmX86; 25 else version (D_InlineAsm_X86) 26 version = AsmX86; 27 28 version (X86_64) 29 version = AnyX86; 30 else version (X86) 31 version = AnyX86; 32 33 // Use to implement 64-bit bitops on 32-bit arch. 34 private union Split64 35 { 36 ulong u64; 37 struct 38 { 39 version (LittleEndian) 40 { 41 uint lo; 42 uint hi; 43 } 44 else 45 { 46 uint hi; 47 uint lo; 48 } 49 } 50 51 pragma(inline, true) 52 this(ulong u64) @safe pure nothrow @nogc 53 { 54 if (__ctfe) 55 { 56 lo = cast(uint) u64; 57 hi = cast(uint) (u64 >>> 32); 58 } 59 else 60 this.u64 = u64; 61 } 62 } 63 64 unittest 65 { 66 const rt = Split64(1); 67 assert((rt.lo == 1) && (rt.hi == 0)); 68 69 enum ct = Split64(1); 70 assert((ct.lo == rt.lo) && (ct.hi == rt.hi)); 71 } 72 73 /** 74 * Scans the bits in v starting with bit 0, looking 75 * for the first set bit. 76 * Returns: 77 * The bit number of the first bit set. 78 * The return value is undefined if v is zero. 79 */ 80 pragma(inline, true) // LDC 81 int bsf(uint v) pure 82 { 83 version (LDC) 84 { 85 if (!__ctfe) 86 return cast(int) llvm_cttz(v, true); 87 } 88 else 89 { 90 pragma(inline, false); // so intrinsic detection will work 91 } 92 return softBsf!uint(v); 93 } 94 95 /// ditto 96 pragma(inline, true) // LDC 97 int bsf(ulong v) pure 98 { 99 static if (size_t.sizeof == ulong.sizeof) // 64 bit code gen 100 { 101 version (LDC) 102 { 103 if (!__ctfe) 104 return cast(int) llvm_cttz(v, true); 105 } 106 else 107 { 108 pragma(inline, false); // so intrinsic detection will work 109 } 110 return softBsf!ulong(v); 111 } 112 else 113 { 114 /* intrinsic not available for 32 bit code, 115 * make do with 32 bit bsf 116 */ 117 const sv = Split64(v); 118 return (sv.lo == 0)? 119 bsf(sv.hi) + 32 : 120 bsf(sv.lo); 121 } 122 } 123 124 /// 125 unittest 126 { 127 assert(bsf(0x21) == 0); 128 assert(bsf(ulong.max << 39) == 39); 129 } 130 131 unittest 132 { 133 // Make sure bsf() is available at CTFE 134 enum test_ctfe = bsf(ulong.max); 135 assert(test_ctfe == 0); 136 } 137 138 /** 139 * Scans the bits in v from the most significant bit 140 * to the least significant bit, looking 141 * for the first set bit. 142 * Returns: 143 * The bit number of the first bit set. 144 * The return value is undefined if v is zero. 145 */ 146 pragma(inline, true) // LDC 147 int bsr(uint v) pure 148 { 149 version (LDC) 150 { 151 if (!__ctfe) 152 return cast(int) (typeof(v).sizeof * 8 - 1 - llvm_ctlz(v, true)); 153 } 154 else 155 { 156 pragma(inline, false); // so intrinsic detection will work 157 } 158 return softBsr!uint(v); 159 } 160 161 /// ditto 162 pragma(inline, true) // LDC 163 int bsr(ulong v) pure 164 { 165 static if (size_t.sizeof == ulong.sizeof) // 64 bit code gen 166 { 167 version (LDC) 168 { 169 if (!__ctfe) 170 return cast(int) (size_t.sizeof * 8 - 1 - llvm_ctlz(v, true)); 171 } 172 else 173 { 174 pragma(inline, false); // so intrinsic detection will work 175 } 176 return softBsr!ulong(v); 177 } 178 else 179 { 180 /* intrinsic not available for 32 bit code, 181 * make do with 32 bit bsr 182 */ 183 const sv = Split64(v); 184 return (sv.hi == 0)? 185 bsr(sv.lo) : 186 bsr(sv.hi) + 32; 187 } 188 } 189 190 /// 191 unittest 192 { 193 assert(bsr(0x21) == 5); 194 assert(bsr((ulong.max >> 15) - 1) == 48); 195 } 196 197 unittest 198 { 199 // Make sure bsr() is available at CTFE 200 enum test_ctfe = bsr(ulong.max); 201 assert(test_ctfe == 63); 202 } 203 204 private alias softBsf(N) = softScan!(N, true); 205 private alias softBsr(N) = softScan!(N, false); 206 207 /* Shared software fallback implementation for bit scan foward and reverse. 208 209 If forward is true, bsf is computed (the index of the first set bit). 210 If forward is false, bsr is computed (the index of the last set bit). 211 212 -1 is returned if no bits are set (v == 0). 213 */ 214 private int softScan(N, bool forward)(N v) pure 215 if (is(N == uint) || is(N == ulong)) 216 { 217 // bsf() and bsr() are officially undefined for v == 0. 218 if (!v) 219 return -1; 220 221 // This is essentially an unrolled binary search: 222 enum mask(ulong lo) = forward ? cast(N) lo : cast(N)~lo; 223 enum inc(int up) = forward ? up : -up; 224 225 N x; 226 int ret; 227 static if (is(N == ulong)) 228 { 229 x = v & mask!0x0000_0000_FFFF_FFFFL; 230 if (x) 231 { 232 v = x; 233 ret = forward ? 0 : 63; 234 } 235 else 236 ret = forward ? 32 : 31; 237 238 x = v & mask!0x0000_FFFF_0000_FFFFL; 239 if (x) 240 v = x; 241 else 242 ret += inc!16; 243 } 244 else static if (is(N == uint)) 245 { 246 x = v & mask!0x0000_FFFF; 247 if (x) 248 { 249 v = x; 250 ret = forward ? 0 : 31; 251 } 252 else 253 ret = forward ? 16 : 15; 254 } 255 else 256 static assert(false); 257 258 x = v & mask!0x00FF_00FF_00FF_00FFL; 259 if (x) 260 v = x; 261 else 262 ret += inc!8; 263 264 x = v & mask!0x0F0F_0F0F_0F0F_0F0FL; 265 if (x) 266 v = x; 267 else 268 ret += inc!4; 269 270 x = v & mask!0x3333_3333_3333_3333L; 271 if (x) 272 v = x; 273 else 274 ret += inc!2; 275 276 x = v & mask!0x5555_5555_5555_5555L; 277 if (!x) 278 ret += inc!1; 279 280 return ret; 281 } 282 283 unittest 284 { 285 assert(softBsf!uint(0u) == -1); 286 assert(softBsr!uint(0u) == -1); 287 assert(softBsf!ulong(0uL) == -1); 288 assert(softBsr!ulong(0uL) == -1); 289 290 assert(softBsf!uint(0x0031_A000) == 13); 291 assert(softBsr!uint(0x0031_A000) == 21); 292 assert(softBsf!ulong(0x0000_0001_8000_0000L) == 31); 293 assert(softBsr!ulong(0x0000_0001_8000_0000L) == 32); 294 295 foreach (b; 0 .. 64) 296 { 297 if (b < 32) 298 { 299 assert(softBsf!uint(1u << b) == b); 300 assert(softBsr!uint(1u << b) == b); 301 } 302 303 assert(softBsf!ulong(1uL << b) == b); 304 assert(softBsr!ulong(1uL << b) == b); 305 } 306 } 307 308 /** 309 * Tests the bit. 310 * (No longer an intrisic - the compiler recognizes the patterns 311 * in the body.) 312 */ 313 version (none) 314 { 315 // Our implementation returns an arbitrary non-zero value if the bit was 316 // set, which is not what std.bitmanip expects. 317 pragma(LDC_intrinsic, "ldc.bitop.bt") 318 int bt(const scope size_t* p, size_t bitnum) pure @system; 319 } 320 else 321 pragma(inline, true) // LDC 322 int bt(const scope size_t* p, size_t bitnum) pure @system 323 { 324 static if (size_t.sizeof == 8) 325 return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0; 326 else static if (size_t.sizeof == 4) 327 return ((p[bitnum >> 5] & (1 << (bitnum & 31)))) != 0; 328 else 329 static assert(0); 330 } 331 /// 332 @system pure unittest 333 { 334 size_t[2] array; 335 336 array[0] = 2; 337 array[1] = 0x100; 338 339 assert(bt(array.ptr, 1)); 340 assert(array[0] == 2); 341 assert(array[1] == 0x100); 342 } 343 344 345 version (LDC) 346 { 347 pragma(LDC_intrinsic, "ldc.bitop.bts") 348 private int __bts(size_t* p, size_t bitnum) pure @system; 349 pragma(LDC_intrinsic, "ldc.bitop.btc") 350 private int __btc(size_t* p, size_t bitnum) pure @system; 351 pragma(LDC_intrinsic, "ldc.bitop.btr") 352 private int __btr(size_t* p, size_t bitnum) pure @system; 353 } 354 355 private 356 int softBtx(string op)(size_t* p, size_t bitnum) pure @system 357 { 358 size_t indexIntoArray = bitnum / (size_t.sizeof*8); 359 size_t bitmask = size_t(1) << (bitnum & ((size_t.sizeof*8) - 1)); 360 size_t original = p[indexIntoArray]; 361 mixin("p[indexIntoArray] = original " ~ op ~ " bitmask;"); 362 return (original&bitmask) > 0 ? true : false; 363 } 364 365 /** 366 * Tests and complements the bit. 367 */ 368 pragma(inline, true) // LDC 369 int btc(size_t* p, size_t bitnum) pure @system 370 { 371 version (LDC) 372 { 373 if (!__ctfe) 374 return __btc(p, bitnum); 375 } 376 else 377 { 378 pragma(inline, false); // such that DMD intrinsic detection will work 379 } 380 return softBtx!"^"(p, bitnum); 381 } 382 383 384 /** 385 * Tests and resets (sets to 0) the bit. 386 */ 387 pragma(inline, true) // LDC 388 int btr(size_t* p, size_t bitnum) pure @system 389 { 390 version (LDC) 391 { 392 if (!__ctfe) 393 return __btr(p, bitnum); 394 } 395 else 396 { 397 pragma(inline, false); // such that DMD intrinsic detection will work 398 } 399 return softBtx!"& ~"(p, bitnum); 400 } 401 402 403 /** 404 * Tests and sets the bit. 405 * Params: 406 * p = a non-NULL pointer to an array of size_ts. 407 * bitnum = a bit number, starting with bit 0 of p[0], 408 * and progressing. It addresses bits like the expression: 409 --- 410 p[index / (size_t.sizeof*8)] & (1 << (index & ((size_t.sizeof*8) - 1))) 411 --- 412 * Returns: 413 * A non-zero value if the bit was set, and a zero 414 * if it was clear. 415 */ 416 pragma(inline, true) // LDC 417 int bts(size_t* p, size_t bitnum) pure @system 418 { 419 version (LDC) 420 { 421 if (!__ctfe) 422 return __bts(p, bitnum); 423 } 424 else 425 { 426 pragma(inline, false); // such that DMD intrinsic detection will work 427 } 428 return softBtx!"|"(p, bitnum); 429 } 430 431 /// 432 @system pure unittest 433 { 434 @system pure bool testbitset() 435 { 436 size_t[2] array; 437 438 array[0] = 2; 439 array[1] = 0x100; 440 441 assert(btc(array.ptr, 35) == 0); 442 if (size_t.sizeof == 8) 443 { 444 assert(array[0] == 0x8_0000_0002); 445 assert(array[1] == 0x100); 446 } 447 else 448 { 449 assert(array[0] == 2); 450 assert(array[1] == 0x108); 451 } 452 453 assert(btc(array.ptr, 35)); 454 assert(array[0] == 2); 455 assert(array[1] == 0x100); 456 457 assert(bts(array.ptr, 35) == 0); 458 if (size_t.sizeof == 8) 459 { 460 assert(array[0] == 0x8_0000_0002); 461 assert(array[1] == 0x100); 462 } 463 else 464 { 465 assert(array[0] == 2); 466 assert(array[1] == 0x108); 467 } 468 469 assert(btr(array.ptr, 35)); 470 assert(array[0] == 2); 471 assert(array[1] == 0x100); 472 473 return true; 474 } 475 476 enum b = testbitset(); // CTFE test 477 testbitset(); // runtime test 478 } 479 480 /** 481 * Range over bit set. Each element is the bit number that is set. 482 * 483 * This is more efficient than testing each bit in a sparsely populated bit 484 * set. Note that the first bit in the bit set would be bit 0. 485 */ 486 struct BitRange 487 { 488 /// Number of bits in each size_t 489 enum bitsPerWord = size_t.sizeof * 8; 490 491 private 492 { 493 const(size_t)*bits; // points at next word of bits to check 494 size_t cur; // needed to allow searching bits using bsf 495 size_t idx; // index of current set bit 496 size_t len; // number of bits in the bit set. 497 } 498 @nogc nothrow pure: 499 500 /** 501 * Construct a BitRange. 502 * 503 * Params: 504 * bitarr = The array of bits to iterate over 505 * numBits = The total number of valid bits in the given bit array 506 */ 507 this(const(size_t)* bitarr, size_t numBits) @system 508 { 509 bits = bitarr; 510 len = numBits; 511 if (len) 512 { 513 // prime the first bit 514 cur = *bits++ ^ 1; 515 popFront(); 516 } 517 } 518 519 /// Range functions 520 size_t front() 521 { 522 assert(!empty); 523 return idx; 524 } 525 526 /// ditto 527 bool empty() const 528 { 529 return idx >= len; 530 } 531 532 /// ditto 533 void popFront() @system 534 { 535 // clear the current bit 536 auto curbit = idx % bitsPerWord; 537 cur ^= size_t(1) << curbit; 538 if (!cur) 539 { 540 // find next size_t with set bit 541 idx -= curbit; 542 while (!cur) 543 { 544 if ((idx += bitsPerWord) >= len) 545 // now empty 546 return; 547 cur = *bits++; 548 } 549 idx += bsf(cur); 550 } 551 else 552 { 553 idx += bsf(cur) - curbit; 554 } 555 } 556 } 557 558 /// 559 @system unittest 560 { 561 import core.stdc.stdlib : malloc, free; 562 import core.stdc.string : memset; 563 564 // initialize a bit array 565 enum nBytes = (100 + BitRange.bitsPerWord - 1) / 8; 566 size_t *bitArr = cast(size_t *)malloc(nBytes); 567 scope(exit) free(bitArr); 568 memset(bitArr, 0, nBytes); 569 570 // set some bits 571 bts(bitArr, 48); 572 bts(bitArr, 24); 573 bts(bitArr, 95); 574 bts(bitArr, 78); 575 576 enum sum = 48 + 24 + 95 + 78; 577 578 // iterate 579 size_t testSum; 580 size_t nBits; 581 foreach (b; BitRange(bitArr, 100)) 582 { 583 testSum += b; 584 ++nBits; 585 } 586 587 assert(testSum == sum); 588 assert(nBits == 4); 589 } 590 591 @system unittest 592 { 593 void testIt(size_t numBits, size_t[] bitsToTest...) 594 { 595 import core.stdc.stdlib : malloc, free; 596 import core.stdc.string : memset; 597 immutable numBytes = (numBits + size_t.sizeof * 8 - 1) / 8; 598 size_t* bitArr = cast(size_t *)malloc(numBytes); 599 scope(exit) free(bitArr); 600 memset(bitArr, 0, numBytes); 601 foreach (b; bitsToTest) 602 bts(bitArr, b); 603 auto br = BitRange(bitArr, numBits); 604 foreach (b; bitsToTest) 605 { 606 assert(!br.empty); 607 assert(b == br.front); 608 br.popFront(); 609 } 610 assert(br.empty); 611 } 612 613 testIt(100, 0, 1, 31, 63, 85); 614 testIt(100, 6, 45, 89, 92, 99); 615 } 616 617 /** 618 * Swaps bytes in a 2 byte ushort. 619 * Params: 620 * x = value 621 * Returns: 622 * `x` with bytes swapped 623 */ 624 version (LDC) 625 { 626 alias byteswap = llvm_bswap!ushort; 627 } 628 else 629 pragma(inline, false) 630 ushort byteswap(ushort x) pure 631 { 632 /* Calling it bswap(ushort) would break existing code that calls bswap(uint). 633 * 634 * This pattern is meant to be recognized by the dmd code generator. 635 * Don't change it without checking that an XCH instruction is still 636 * used to implement it. 637 * Inlining may also throw it off. 638 */ 639 return cast(ushort) (((x >> 8) & 0xFF) | ((x << 8) & 0xFF00u)); 640 } 641 642 /// 643 unittest 644 { 645 assert(byteswap(cast(ushort)0xF234) == 0x34F2); 646 static ushort xx = 0xF234; 647 assert(byteswap(xx) == 0x34F2); 648 } 649 650 /** 651 * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes 652 * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 653 * becomes byte 0. 654 */ 655 version (LDC) 656 { 657 alias bswap = llvm_bswap!uint; 658 } 659 else 660 uint bswap(uint v) pure; 661 662 /// 663 unittest 664 { 665 assert(bswap(0x01020304u) == 0x04030201u); 666 static uint xx = 0x10203040u; 667 assert(bswap(xx) == 0x40302010u); 668 } 669 670 /** 671 * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes 672 * byte 7, byte 1 becomes byte 6, etc. 673 * This is meant to be recognized by the compiler as an intrinsic. 674 */ 675 version (LDC) 676 { 677 alias bswap = llvm_bswap!ulong; 678 } 679 else 680 ulong bswap(ulong v) pure; 681 682 /// 683 unittest 684 { 685 assert(bswap(0x01020304_05060708uL) == 0x08070605_04030201uL); 686 static ulong xx = 0x10203040_50607080uL; 687 assert(bswap(xx) == 0x80706050_40302010uL); 688 } 689 690 version (DigitalMars) version (AnyX86) @system // not pure 691 { 692 /** 693 * Reads I/O port at port_address. 694 */ 695 ubyte inp(uint port_address); 696 697 698 /** 699 * ditto 700 */ 701 ushort inpw(uint port_address); 702 703 704 /** 705 * ditto 706 */ 707 uint inpl(uint port_address); 708 709 710 /** 711 * Writes and returns value to I/O port at port_address. 712 */ 713 ubyte outp(uint port_address, ubyte value); 714 715 716 /** 717 * ditto 718 */ 719 ushort outpw(uint port_address, ushort value); 720 721 722 /** 723 * ditto 724 */ 725 uint outpl(uint port_address, uint value); 726 } 727 version (LDC) @system // not pure 728 { 729 /** 730 * Reads I/O port at port_address. 731 */ 732 uint inp(uint port_address) 733 { 734 assert(false, "inp not yet implemented for LDC."); 735 } 736 737 738 /** 739 * ditto 740 */ 741 uint inpw(uint port_address) 742 { 743 assert(false, "inpw not yet implemented for LDC."); 744 } 745 746 747 /** 748 * ditto 749 */ 750 uint inpl(uint port_address) 751 { 752 assert(false, "inpl not yet implemented for LDC."); 753 } 754 755 756 /** 757 * Writes and returns value to I/O port at port_address. 758 */ 759 ubyte outp(uint port_address, uint value) 760 { 761 assert(false, "outp not yet implemented for LDC."); 762 } 763 764 765 /** 766 * ditto 767 */ 768 ushort outpw(uint port_address, uint value) 769 { 770 assert(false, "outpw not yet implemented for LDC."); 771 } 772 773 774 /** 775 * ditto 776 */ 777 uint outpl(uint port_address, uint value) 778 { 779 assert(false, "outpl not yet implemented for LDC."); 780 } 781 } 782 783 version (LDC) 784 { 785 alias _popcnt = llvm_ctpop!ushort; 786 787 pragma(inline, true): 788 789 int _popcnt(uint x) pure 790 { 791 return cast(int) llvm_ctpop(x); 792 } 793 794 int _popcnt(ulong x) pure 795 { 796 return cast(int) llvm_ctpop(x); 797 } 798 } 799 800 801 /** 802 * Calculates the number of set bits in an integer. 803 */ 804 pragma(inline, true) // LDC 805 int popcnt(uint x) pure 806 { 807 // Select the fastest method depending on the compiler and CPU architecture 808 version (LDC) 809 { 810 if (!__ctfe) 811 return _popcnt(x); 812 } 813 else version (DigitalMars) 814 { 815 static if (is(typeof(_popcnt(uint.max)))) 816 { 817 import core.cpuid; 818 if (!__ctfe && hasPopcnt) 819 return _popcnt(x); 820 } 821 } 822 823 return softPopcnt!uint(x); 824 } 825 826 unittest 827 { 828 assert( popcnt( 0 ) == 0 ); 829 assert( popcnt( 7 ) == 3 ); 830 assert( popcnt( 0xAA )== 4 ); 831 assert( popcnt( 0x8421_1248 ) == 8 ); 832 assert( popcnt( 0xFFFF_FFFF ) == 32 ); 833 assert( popcnt( 0xCCCC_CCCC ) == 16 ); 834 assert( popcnt( 0x7777_7777 ) == 24 ); 835 836 // Make sure popcnt() is available at CTFE 837 enum test_ctfe = popcnt(uint.max); 838 assert(test_ctfe == 32); 839 } 840 841 /// ditto 842 pragma(inline, true) // LDC 843 int popcnt(ulong x) pure 844 { 845 // Select the fastest method depending on the compiler and CPU architecture 846 version (LDC) 847 { 848 if (!__ctfe) 849 return _popcnt(x); 850 } 851 852 import core.cpuid; 853 854 static if (size_t.sizeof == uint.sizeof) 855 { 856 const sx = Split64(x); 857 version (DigitalMars) 858 { 859 static if (is(typeof(_popcnt(uint.max)))) 860 { 861 if (!__ctfe && hasPopcnt) 862 return _popcnt(sx.lo) + _popcnt(sx.hi); 863 } 864 } 865 866 return softPopcnt!uint(sx.lo) + softPopcnt!uint(sx.hi); 867 } 868 else static if (size_t.sizeof == ulong.sizeof) 869 { 870 version (DigitalMars) 871 { 872 static if (is(typeof(_popcnt(ulong.max)))) 873 { 874 if (!__ctfe && hasPopcnt) 875 return _popcnt(x); 876 } 877 } 878 879 return softPopcnt!ulong(x); 880 } 881 else 882 static assert(false); 883 } 884 885 unittest 886 { 887 assert(popcnt(0uL) == 0); 888 assert(popcnt(1uL) == 1); 889 assert(popcnt((1uL << 32) - 1) == 32); 890 assert(popcnt(0x48_65_6C_6C_6F_3F_21_00uL) == 28); 891 assert(popcnt(ulong.max) == 64); 892 893 // Make sure popcnt() is available at CTFE 894 enum test_ctfe = popcnt(ulong.max); 895 assert(test_ctfe == 64); 896 } 897 898 private int softPopcnt(N)(N x) pure 899 if (is(N == uint) || is(N == ulong)) 900 { 901 // Avoid branches, and the potential for cache misses which 902 // could be incurred with a table lookup. 903 904 // We need to mask alternate bits to prevent the 905 // sum from overflowing. 906 // add neighbouring bits. Each bit is 0 or 1. 907 enum mask1 = cast(N) 0x5555_5555_5555_5555L; 908 x = x - ((x>>1) & mask1); 909 // now each two bits of x is a number 00,01 or 10. 910 // now add neighbouring pairs 911 enum mask2a = cast(N) 0xCCCC_CCCC_CCCC_CCCCL; 912 enum mask2b = cast(N) 0x3333_3333_3333_3333L; 913 x = ((x & mask2a)>>2) + (x & mask2b); 914 // now each nibble holds 0000-0100. Adding them won't 915 // overflow any more, so we don't need to mask any more 916 917 enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL; 918 x = (x + (x >> 4)) & mask4; 919 920 enum shiftbits = is(N == uint)? 24 : 56; 921 enum maskMul = cast(N) 0x0101_0101_0101_0101L; 922 x = (x * maskMul) >> shiftbits; 923 924 return cast(int) x; 925 } 926 927 version (DigitalMars) version (AnyX86) 928 { 929 /** 930 * Calculates the number of set bits in an integer 931 * using the X86 SSE4 POPCNT instruction. 932 * POPCNT is not available on all X86 CPUs. 933 */ 934 ushort _popcnt( ushort x ) pure; 935 /// ditto 936 int _popcnt( uint x ) pure; 937 version (X86_64) 938 { 939 /// ditto 940 int _popcnt( ulong x ) pure; 941 } 942 943 unittest 944 { 945 // Not everyone has SSE4 instructions 946 import core.cpuid; 947 if (!hasPopcnt) 948 return; 949 950 static int popcnt_x(ulong u) nothrow @nogc 951 { 952 int c; 953 while (u) 954 { 955 c += u & 1; 956 u >>= 1; 957 } 958 return c; 959 } 960 961 for (uint u = 0; u < 0x1_0000; ++u) 962 { 963 //writefln("%x %x %x", u, _popcnt(cast(ushort)u), popcnt_x(cast(ushort)u)); 964 assert(_popcnt(cast(ushort)u) == popcnt_x(cast(ushort)u)); 965 966 assert(_popcnt(cast(uint)u) == popcnt_x(cast(uint)u)); 967 uint ui = u * 0x3_0001; 968 assert(_popcnt(ui) == popcnt_x(ui)); 969 970 version (X86_64) 971 { 972 assert(_popcnt(cast(ulong)u) == popcnt_x(cast(ulong)u)); 973 ulong ul = u * 0x3_0003_0001; 974 assert(_popcnt(ul) == popcnt_x(ul)); 975 } 976 } 977 } 978 } 979 980 981 /** 982 * Reverses the order of bits in a 32-bit integer. 983 */ 984 pragma(inline, true) 985 uint bitswap( uint x ) pure 986 { 987 if (!__ctfe) 988 { 989 version (LDC) 990 { 991 return llvm_bitreverse(x); 992 } 993 else 994 static if (is(typeof(asmBitswap32(x)))) 995 return asmBitswap32(x); 996 } 997 998 return softBitswap!uint(x); 999 } 1000 1001 unittest 1002 { 1003 static void test(alias impl)() 1004 { 1005 assert (impl( 0x8000_0100 ) == 0x0080_0001); 1006 foreach (i; 0 .. 32) 1007 assert (impl(1 << i) == 1 << 32 - i - 1); 1008 } 1009 1010 test!(bitswap)(); 1011 test!(softBitswap!uint)(); 1012 static if (is(typeof(asmBitswap32(0u)))) 1013 test!(asmBitswap32)(); 1014 1015 // Make sure bitswap() is available at CTFE 1016 enum test_ctfe = bitswap(1U); 1017 assert(test_ctfe == (1U << 31)); 1018 } 1019 1020 /** 1021 * Reverses the order of bits in a 64-bit integer. 1022 */ 1023 pragma(inline, true) 1024 ulong bitswap ( ulong x ) pure 1025 { 1026 if (!__ctfe) 1027 { 1028 version (LDC) 1029 { 1030 return llvm_bitreverse(x); 1031 } 1032 else 1033 static if (is(typeof(asmBitswap64(x)))) 1034 return asmBitswap64(x); 1035 } 1036 1037 return softBitswap!ulong(x); 1038 } 1039 1040 unittest 1041 { 1042 static void test(alias impl)() 1043 { 1044 assert (impl( 0b1000000000000000000000010000000000000000100000000000000000000001) 1045 == 0b1000000000000000000000010000000000000000100000000000000000000001); 1046 assert (impl( 0b1110000000000000000000010000000000000000100000000000000000000001) 1047 == 0b1000000000000000000000010000000000000000100000000000000000000111); 1048 foreach (i; 0 .. 64) 1049 assert (impl(1UL << i) == 1UL << 64 - i - 1); 1050 } 1051 1052 test!(bitswap)(); 1053 test!(softBitswap!ulong)(); 1054 static if (is(typeof(asmBitswap64(0uL)))) 1055 test!(asmBitswap64)(); 1056 1057 // Make sure bitswap() is available at CTFE 1058 enum test_ctfe = bitswap(1UL); 1059 assert(test_ctfe == (1UL << 63)); 1060 } 1061 1062 private N softBitswap(N)(N x) pure 1063 if (is(N == uint) || is(N == ulong)) 1064 { 1065 // swap 1-bit pairs: 1066 enum mask1 = cast(N) 0x5555_5555_5555_5555L; 1067 x = ((x >> 1) & mask1) | ((x & mask1) << 1); 1068 // swap 2-bit pairs: 1069 enum mask2 = cast(N) 0x3333_3333_3333_3333L; 1070 x = ((x >> 2) & mask2) | ((x & mask2) << 2); 1071 // swap 4-bit pairs: 1072 enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL; 1073 x = ((x >> 4) & mask4) | ((x & mask4) << 4); 1074 1075 // reverse the order of all bytes: 1076 x = bswap(x); 1077 1078 return x; 1079 } 1080 1081 version (AsmX86) 1082 { 1083 private uint asmBitswap32(uint x) @trusted pure 1084 { 1085 asm pure nothrow @nogc { naked; } 1086 1087 version (D_InlineAsm_X86_64) 1088 { 1089 version (Win64) 1090 asm pure nothrow @nogc { mov EAX, ECX; } 1091 else 1092 asm pure nothrow @nogc { mov EAX, EDI; } 1093 } 1094 1095 asm pure nothrow @nogc 1096 { 1097 // Author: Tiago Gasiba. 1098 mov EDX, EAX; 1099 shr EAX, 1; 1100 and EDX, 0x5555_5555; 1101 and EAX, 0x5555_5555; 1102 shl EDX, 1; 1103 or EAX, EDX; 1104 mov EDX, EAX; 1105 shr EAX, 2; 1106 and EDX, 0x3333_3333; 1107 and EAX, 0x3333_3333; 1108 shl EDX, 2; 1109 or EAX, EDX; 1110 mov EDX, EAX; 1111 shr EAX, 4; 1112 and EDX, 0x0f0f_0f0f; 1113 and EAX, 0x0f0f_0f0f; 1114 shl EDX, 4; 1115 or EAX, EDX; 1116 bswap EAX; 1117 ret; 1118 } 1119 } 1120 } 1121 1122 version (LDC) {} else 1123 version (D_InlineAsm_X86_64) 1124 { 1125 private ulong asmBitswap64(ulong x) @trusted pure 1126 { 1127 asm pure nothrow @nogc { naked; } 1128 1129 version (Win64) 1130 asm pure nothrow @nogc { mov RAX, RCX; } 1131 else 1132 asm pure nothrow @nogc { mov RAX, RDI; } 1133 1134 asm pure nothrow @nogc 1135 { 1136 // Author: Tiago Gasiba. 1137 mov RDX, RAX; 1138 shr RAX, 1; 1139 mov RCX, 0x5555_5555_5555_5555L; 1140 and RDX, RCX; 1141 and RAX, RCX; 1142 shl RDX, 1; 1143 or RAX, RDX; 1144 1145 mov RDX, RAX; 1146 shr RAX, 2; 1147 mov RCX, 0x3333_3333_3333_3333L; 1148 and RDX, RCX; 1149 and RAX, RCX; 1150 shl RDX, 2; 1151 or RAX, RDX; 1152 1153 mov RDX, RAX; 1154 shr RAX, 4; 1155 mov RCX, 0x0f0f_0f0f_0f0f_0f0fL; 1156 and RDX, RCX; 1157 and RAX, RCX; 1158 shl RDX, 4; 1159 or RAX, RDX; 1160 bswap RAX; 1161 ret; 1162 } 1163 } 1164 } 1165 1166 /** 1167 * Bitwise rotate `value` left (`rol`) or right (`ror`) by 1168 * `count` bit positions. 1169 */ 1170 pragma(inline, true) // LDC 1171 pure T rol(T)(const T value, const uint count) 1172 if (__traits(isIntegral, T) && __traits(isUnsigned, T)) 1173 { 1174 assert(count < 8 * T.sizeof); 1175 if (count == 0) 1176 return cast(T) value; 1177 1178 return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count))); 1179 } 1180 /// ditto 1181 pragma(inline, true) // LDC 1182 pure T ror(T)(const T value, const uint count) 1183 if (__traits(isIntegral, T) && __traits(isUnsigned, T)) 1184 { 1185 assert(count < 8 * T.sizeof); 1186 if (count == 0) 1187 return cast(T) value; 1188 1189 return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count))); 1190 } 1191 /// ditto 1192 pragma(inline, true) // LDC 1193 pure T rol(uint count, T)(const T value) 1194 if (__traits(isIntegral, T) && __traits(isUnsigned, T)) 1195 { 1196 static assert(count < 8 * T.sizeof); 1197 static if (count == 0) 1198 return cast(T) value; 1199 1200 return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count))); 1201 } 1202 /// ditto 1203 pragma(inline, true) // LDC 1204 pure T ror(uint count, T)(const T value) 1205 if (__traits(isIntegral, T) && __traits(isUnsigned, T)) 1206 { 1207 static assert(count < 8 * T.sizeof); 1208 static if (count == 0) 1209 return cast(T) value; 1210 1211 return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count))); 1212 } 1213 1214 /// 1215 unittest 1216 { 1217 ubyte a = 0b11110000U; 1218 ulong b = ~1UL; 1219 1220 assert(rol(a, 1) == 0b11100001); 1221 assert(ror(a, 1) == 0b01111000); 1222 assert(rol(a, 3) == 0b10000111); 1223 assert(ror(a, 3) == 0b00011110); 1224 1225 assert(rol(a, 0) == a); 1226 assert(ror(a, 0) == a); 1227 1228 assert(rol(b, 63) == ~(1UL << 63)); 1229 assert(ror(b, 63) == ~2UL); 1230 1231 assert(rol!3(a) == 0b10000111); 1232 assert(ror!3(a) == 0b00011110); 1233 1234 enum c = rol(uint(1), 0); 1235 enum d = ror(uint(1), 0); 1236 assert(c == uint(1)); 1237 assert(d == uint(1)); 1238 }