1 /** 2 * Base floating point routines. 3 * 4 * Macros: 5 * TABLE_SV = <table border="1" cellpadding="4" cellspacing="0"> 6 * <caption>Special Values</caption> 7 * $0</table> 8 * SVH = $(TR $(TH $1) $(TH $2)) 9 * SV = $(TR $(TD $1) $(TD $2)) 10 * TH3 = $(TR $(TH $1) $(TH $2) $(TH $3)) 11 * TD3 = $(TR $(TD $1) $(TD $2) $(TD $3)) 12 * TABLE_DOMRG = <table border="1" cellpadding="4" cellspacing="0"> 13 * $(SVH Domain X, Range Y) 14 $(SV $1, $2) 15 * </table> 16 * DOMAIN=$1 17 * RANGE=$1 18 * NAN = $(RED NAN) 19 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span> 20 * GAMMA = Γ 21 * THETA = θ 22 * INTEGRAL = ∫ 23 * INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>) 24 * POWER = $1<sup>$2</sup> 25 * SUB = $1<sub>$2</sub> 26 * BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>) 27 * CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG )) 28 * PLUSMN = ± 29 * INFIN = ∞ 30 * PLUSMNINF = ±∞ 31 * PI = π 32 * LT = < 33 * GT = > 34 * SQRT = √ 35 * HALF = ½ 36 * 37 * License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 38 * Authors: $(HTTP digitalmars.com, Walter Bright), Don Clugston, Ilia Ki 39 */ 40 module mir.math.ieee; 41 42 import mir.internal.utility: isFloatingPoint; 43 44 /********************************* 45 * Return `true` if sign bit of e is set, `false` if not. 46 */ 47 bool signbit(T)(const T x) @nogc @trusted pure nothrow 48 { 49 if (__ctfe) 50 { 51 double dval = cast(double) x; // Precision can increase or decrease but sign won't change (even NaN). 52 return 0 > *cast(long*) &dval; 53 } 54 55 mixin floatTraits!T; 56 57 static if (realFormat == RealFormat.ieeeSingle) 58 { 59 return 0 > *cast(int*) &x; 60 } 61 else 62 static if (realFormat == RealFormat.ieeeDouble) 63 { 64 return 0 > *cast(long*) &x; 65 } 66 else 67 static if (realFormat == RealFormat.ieeeQuadruple) 68 { 69 return 0 > ((cast(long*)&x)[MANTISSA_MSB]); 70 } 71 else static if (realFormat == RealFormat.ieeeExtended) 72 { 73 version (LittleEndian) 74 auto mp = cast(ubyte*)&x + 9; 75 else 76 auto mp = cast(ubyte*)&x; 77 78 return (*mp & 0x80) != 0; 79 } 80 else static assert(0, "signbit is not implemented."); 81 } 82 83 /// 84 @nogc @safe pure nothrow version(mir_core_test) unittest 85 { 86 assert(!signbit(float.nan)); 87 assert(signbit(-float.nan)); 88 assert(!signbit(168.1234f)); 89 assert(signbit(-168.1234f)); 90 assert(!signbit(0.0f)); 91 assert(signbit(-0.0f)); 92 assert(signbit(-float.max)); 93 assert(!signbit(float.max)); 94 95 assert(!signbit(double.nan)); 96 assert(signbit(-double.nan)); 97 assert(!signbit(168.1234)); 98 assert(signbit(-168.1234)); 99 assert(!signbit(0.0)); 100 assert(signbit(-0.0)); 101 assert(signbit(-double.max)); 102 assert(!signbit(double.max)); 103 104 assert(!signbit(real.nan)); 105 assert(signbit(-real.nan)); 106 assert(!signbit(168.1234L)); 107 assert(signbit(-168.1234L)); 108 assert(!signbit(0.0L)); 109 assert(signbit(-0.0L)); 110 assert(signbit(-real.max)); 111 assert(!signbit(real.max)); 112 } 113 114 /************************************** 115 * To what precision is x equal to y? 116 * 117 * Returns: the number of mantissa bits which are equal in x and y. 118 * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision. 119 * 120 * $(TABLE_SV 121 * $(TR $(TH x) $(TH y) $(TH feqrel(x, y))) 122 * $(TR $(TD x) $(TD x) $(TD real.mant_dig)) 123 * $(TR $(TD x) $(TD $(GT)= 2*x) $(TD 0)) 124 * $(TR $(TD x) $(TD $(LT)= x/2) $(TD 0)) 125 * $(TR $(TD $(NAN)) $(TD any) $(TD 0)) 126 * $(TR $(TD any) $(TD $(NAN)) $(TD 0)) 127 * ) 128 */ 129 int feqrel(T)(const T x, const T y) @trusted pure nothrow @nogc 130 if (isFloatingPoint!T) 131 { 132 /* Public Domain. Author: Don Clugston, 18 Aug 2005. 133 */ 134 mixin floatTraits!T; 135 static if (realFormat == RealFormat.ieeeSingle 136 || realFormat == RealFormat.ieeeDouble 137 || realFormat == RealFormat.ieeeExtended 138 || realFormat == RealFormat.ieeeQuadruple) 139 { 140 import mir.math.common: fabs; 141 142 if (x == y) 143 return T.mant_dig; // ensure diff != 0, cope with IN 144 145 auto diff = fabs(x - y); 146 147 int a = ((cast(U*)& x)[idx] & exp_mask) >>> exp_shft; 148 int b = ((cast(U*)& y)[idx] & exp_mask) >>> exp_shft; 149 int d = ((cast(U*)&diff)[idx] & exp_mask) >>> exp_shft; 150 151 152 // The difference in abs(exponent) between x or y and abs(x-y) 153 // is equal to the number of significand bits of x which are 154 // equal to y. If negative, x and y have different exponents. 155 // If positive, x and y are equal to 'bitsdiff' bits. 156 // AND with 0x7FFF to form the absolute value. 157 // To avoid out-by-1 errors, we subtract 1 so it rounds down 158 // if the exponents were different. This means 'bitsdiff' is 159 // always 1 lower than we want, except that if bitsdiff == 0, 160 // they could have 0 or 1 bits in common. 161 162 int bitsdiff = ((a + b - 1) >> 1) - d; 163 if (d == 0) 164 { // Difference is subnormal 165 // For subnormals, we need to add the number of zeros that 166 // lie at the start of diff's significand. 167 // We do this by multiplying by 2^^real.mant_dig 168 diff *= norm_factor; 169 return bitsdiff + T.mant_dig - int(((cast(U*)&diff)[idx] & exp_mask) >>> exp_shft); 170 } 171 172 if (bitsdiff > 0) 173 return bitsdiff + 1; // add the 1 we subtracted before 174 175 // Avoid out-by-1 errors when factor is almost 2. 176 if (bitsdiff == 0 && (a ^ b) == 0) 177 return 1; 178 else 179 return 0; 180 } 181 else 182 { 183 static assert(false, "Not implemented for this architecture"); 184 } 185 } 186 187 /// 188 @safe pure version(mir_core_test) unittest 189 { 190 assert(feqrel(2.0, 2.0) == 53); 191 assert(feqrel(2.0f, 2.0f) == 24); 192 assert(feqrel(2.0, double.nan) == 0); 193 194 // Test that numbers are within n digits of each 195 // other by testing if feqrel > n * log2(10) 196 197 // five digits 198 assert(feqrel(2.0, 2.00001) > 16); 199 // ten digits 200 assert(feqrel(2.0, 2.00000000001) > 33); 201 } 202 203 @safe pure nothrow @nogc version(mir_core_test) unittest 204 { 205 void testFeqrel(F)() 206 { 207 // Exact equality 208 assert(feqrel(F.max, F.max) == F.mant_dig); 209 assert(feqrel!(F)(0.0, 0.0) == F.mant_dig); 210 assert(feqrel(F.infinity, F.infinity) == F.mant_dig); 211 212 // a few bits away from exact equality 213 F w=1; 214 for (int i = 1; i < F.mant_dig - 1; ++i) 215 { 216 assert(feqrel!(F)(1.0 + w * F.epsilon, 1.0) == F.mant_dig-i); 217 assert(feqrel!(F)(1.0 - w * F.epsilon, 1.0) == F.mant_dig-i); 218 assert(feqrel!(F)(1.0, 1 + (w-1) * F.epsilon) == F.mant_dig - i + 1); 219 w*=2; 220 } 221 222 assert(feqrel!(F)(1.5+F.epsilon, 1.5) == F.mant_dig-1); 223 assert(feqrel!(F)(1.5-F.epsilon, 1.5) == F.mant_dig-1); 224 assert(feqrel!(F)(1.5-F.epsilon, 1.5+F.epsilon) == F.mant_dig-2); 225 226 227 // Numbers that are close 228 assert(feqrel!(F)(0x1.Bp+84, 0x1.B8p+84) == 5); 229 assert(feqrel!(F)(0x1.8p+10, 0x1.Cp+10) == 2); 230 assert(feqrel!(F)(1.5 * (1 - F.epsilon), 1.0L) == 2); 231 assert(feqrel!(F)(1.5, 1.0) == 1); 232 assert(feqrel!(F)(2 * (1 - F.epsilon), 1.0L) == 1); 233 234 // Factors of 2 235 assert(feqrel(F.max, F.infinity) == 0); 236 assert(feqrel!(F)(2 * (1 - F.epsilon), 1.0L) == 1); 237 assert(feqrel!(F)(1.0, 2.0) == 0); 238 assert(feqrel!(F)(4.0, 1.0) == 0); 239 240 // Extreme inequality 241 assert(feqrel(F.nan, F.nan) == 0); 242 assert(feqrel!(F)(0.0L, -F.nan) == 0); 243 assert(feqrel(F.nan, F.infinity) == 0); 244 assert(feqrel(F.infinity, -F.infinity) == 0); 245 assert(feqrel(F.max, -F.max) == 0); 246 247 assert(feqrel(F.min_normal / 8, F.min_normal / 17) == 3); 248 249 const F Const = 2; 250 immutable F Immutable = 2; 251 auto Compiles = feqrel(Const, Immutable); 252 } 253 254 assert(feqrel(7.1824L, 7.1824L) == real.mant_dig); 255 256 testFeqrel!(float)(); 257 testFeqrel!(double)(); 258 testFeqrel!(real)(); 259 } 260 261 /++ 262 +/ 263 enum RealFormat 264 { 265 /// 266 ieeeHalf, 267 /// 268 ieeeSingle, 269 /// 270 ieeeDouble, 271 /// x87 80-bit real 272 ieeeExtended, 273 /// x87 real rounded to precision of double. 274 ieeeExtended53, 275 /// IBM 128-bit extended 276 ibmExtended, 277 /// 278 ieeeQuadruple, 279 } 280 281 /** 282 * Calculate the next largest floating point value after x. 283 * 284 * Return the least number greater than x that is representable as a real; 285 * thus, it gives the next point on the IEEE number line. 286 * 287 * $(TABLE_SV 288 * $(SVH x, nextUp(x) ) 289 * $(SV -$(INFIN), -real.max ) 290 * $(SV $(PLUSMN)0.0, real.min_normal*real.epsilon ) 291 * $(SV real.max, $(INFIN) ) 292 * $(SV $(INFIN), $(INFIN) ) 293 * $(SV $(NAN), $(NAN) ) 294 * ) 295 */ 296 T nextUp(T)(const T x) @trusted pure nothrow @nogc 297 if (isFloatingPoint!T) 298 { 299 mixin floatTraits!T; 300 static if (realFormat == RealFormat.ieeeSingle) 301 { 302 uint s = *cast(uint*)&x; 303 if ((s & 0x7F80_0000) == 0x7F80_0000) 304 { 305 // First, deal with NANs and infinity 306 if (x == -x.infinity) return -x.max; 307 308 return x; // +INF and NAN are unchanged. 309 } 310 if (s > 0x8000_0000) // Negative number 311 { 312 --s; 313 } 314 else 315 if (s == 0x8000_0000) // it was negative zero 316 { 317 s = 0x0000_0001; // change to smallest subnormal 318 } 319 else 320 { 321 // Positive number 322 ++s; 323 } 324 R: 325 return *cast(T*)&s; 326 } 327 else static if (realFormat == RealFormat.ieeeDouble) 328 { 329 ulong s = *cast(ulong*)&x; 330 331 if ((s & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) 332 { 333 // First, deal with NANs and infinity 334 if (x == -x.infinity) return -x.max; 335 return x; // +INF and NAN are unchanged. 336 } 337 if (s > 0x8000_0000_0000_0000) // Negative number 338 { 339 --s; 340 } 341 else 342 if (s == 0x8000_0000_0000_0000) // it was negative zero 343 { 344 s = 0x0000_0000_0000_0001; // change to smallest subnormal 345 } 346 else 347 { 348 // Positive number 349 ++s; 350 } 351 R: 352 return *cast(T*)&s; 353 } 354 else static if (realFormat == RealFormat.ieeeQuadruple) 355 { 356 auto e = exp_mask & (cast(U *)&x)[idx]; 357 if (e == exp_mask) 358 { 359 // NaN or Infinity 360 if (x == -real.infinity) return -real.max; 361 return x; // +Inf and NaN are unchanged. 362 } 363 364 auto ps = cast(ulong *)&x; 365 if (ps[MANTISSA_MSB] & 0x8000_0000_0000_0000) 366 { 367 // Negative number 368 if (ps[MANTISSA_LSB] == 0 && ps[MANTISSA_MSB] == 0x8000_0000_0000_0000) 369 { 370 // it was negative zero, change to smallest subnormal 371 ps[MANTISSA_LSB] = 1; 372 ps[MANTISSA_MSB] = 0; 373 return x; 374 } 375 if (ps[MANTISSA_LSB] == 0) --ps[MANTISSA_MSB]; 376 --ps[MANTISSA_LSB]; 377 } 378 else 379 { 380 // Positive number 381 ++ps[MANTISSA_LSB]; 382 if (ps[MANTISSA_LSB] == 0) ++ps[MANTISSA_MSB]; 383 } 384 return x; 385 } 386 else static if (realFormat == RealFormat.ieeeExtended) 387 { 388 // For 80-bit reals, the "implied bit" is a nuisance... 389 auto pe = cast(U*)&x + idx; 390 version (LittleEndian) 391 auto ps = cast(ulong*)&x; 392 else 393 auto ps = cast(ulong*)((cast(ushort*)&x) + 1); 394 395 if ((*pe & exp_mask) == exp_mask) 396 { 397 // First, deal with NANs and infinity 398 if (x == -real.infinity) return -real.max; 399 return x; // +Inf and NaN are unchanged. 400 } 401 if (*pe & 0x8000) 402 { 403 // Negative number -- need to decrease the significand 404 --*ps; 405 // Need to mask with 0x7FF.. so subnormals are treated correctly. 406 if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_FFFF_FFFF_FFFF) 407 { 408 if (*pe == 0x8000) // it was negative zero 409 { 410 *ps = 1; 411 *pe = 0; // smallest subnormal. 412 return x; 413 } 414 415 --*pe; 416 417 if (*pe == 0x8000) 418 return x; // it's become a subnormal, implied bit stays low. 419 420 *ps = 0xFFFF_FFFF_FFFF_FFFF; // set the implied bit 421 return x; 422 } 423 return x; 424 } 425 else 426 { 427 // Positive number -- need to increase the significand. 428 // Works automatically for positive zero. 429 ++*ps; 430 if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0) 431 { 432 // change in exponent 433 ++*pe; 434 *ps = 0x8000_0000_0000_0000; // set the high bit 435 } 436 } 437 return x; 438 } 439 else // static if (realFormat == RealFormat.ibmExtended) 440 { 441 assert(0, "nextUp not implemented"); 442 } 443 } 444 445 /// 446 @safe @nogc pure nothrow version(mir_core_test) unittest 447 { 448 assert(nextUp(1.0 - 1.0e-6).feqrel(0.999999) > 16); 449 assert(nextUp(1.0 - real.epsilon).feqrel(1.0) > 16); 450 } 451 452 /** 453 * Calculate the next smallest floating point value before x. 454 * 455 * Return the greatest number less than x that is representable as a real; 456 * thus, it gives the previous point on the IEEE number line. 457 * 458 * $(TABLE_SV 459 * $(SVH x, nextDown(x) ) 460 * $(SV $(INFIN), real.max ) 461 * $(SV $(PLUSMN)0.0, -real.min_normal*real.epsilon ) 462 * $(SV -real.max, -$(INFIN) ) 463 * $(SV -$(INFIN), -$(INFIN) ) 464 * $(SV $(NAN), $(NAN) ) 465 * ) 466 */ 467 T nextDown(T)(const T x) @safe pure nothrow @nogc 468 { 469 return -nextUp(-x); 470 } 471 472 /// 473 @safe pure nothrow @nogc version(mir_core_test) unittest 474 { 475 assert( nextDown(1.0 + real.epsilon) == 1.0); 476 } 477 478 @safe pure nothrow @nogc version(mir_core_test) unittest 479 { 480 import std.math: NaN, isIdentical; 481 482 static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended) 483 { 484 485 // Tests for 80-bit reals 486 assert(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC))); 487 // negative numbers 488 assert( nextUp(-real.infinity) == -real.max ); 489 assert( nextUp(-1.0L-real.epsilon) == -1.0 ); 490 assert( nextUp(-2.0L) == -2.0 + real.epsilon); 491 // subnormals and zero 492 assert( nextUp(-real.min_normal) == -real.min_normal*(1-real.epsilon) ); 493 assert( nextUp(-real.min_normal*(1-real.epsilon)) == -real.min_normal*(1-2*real.epsilon) ); 494 assert( isIdentical(-0.0L, nextUp(-real.min_normal*real.epsilon)) ); 495 assert( nextUp(-0.0L) == real.min_normal*real.epsilon ); 496 assert( nextUp(0.0L) == real.min_normal*real.epsilon ); 497 assert( nextUp(real.min_normal*(1-real.epsilon)) == real.min_normal ); 498 assert( nextUp(real.min_normal) == real.min_normal*(1+real.epsilon) ); 499 // positive numbers 500 assert( nextUp(1.0L) == 1.0 + real.epsilon ); 501 assert( nextUp(2.0L-real.epsilon) == 2.0 ); 502 assert( nextUp(real.max) == real.infinity ); 503 assert( nextUp(real.infinity)==real.infinity ); 504 } 505 506 double n = NaN(0xABC); 507 assert(isIdentical(nextUp(n), n)); 508 // negative numbers 509 assert( nextUp(-double.infinity) == -double.max ); 510 assert( nextUp(-1-double.epsilon) == -1.0 ); 511 assert( nextUp(-2.0) == -2.0 + double.epsilon); 512 // subnormals and zero 513 514 assert( nextUp(-double.min_normal) == -double.min_normal*(1-double.epsilon) ); 515 assert( nextUp(-double.min_normal*(1-double.epsilon)) == -double.min_normal*(1-2*double.epsilon) ); 516 assert( isIdentical(-0.0, nextUp(-double.min_normal*double.epsilon)) ); 517 assert( nextUp(0.0) == double.min_normal*double.epsilon ); 518 assert( nextUp(-0.0) == double.min_normal*double.epsilon ); 519 assert( nextUp(double.min_normal*(1-double.epsilon)) == double.min_normal ); 520 assert( nextUp(double.min_normal) == double.min_normal*(1+double.epsilon) ); 521 // positive numbers 522 assert( nextUp(1.0) == 1.0 + double.epsilon ); 523 assert( nextUp(2.0-double.epsilon) == 2.0 ); 524 assert( nextUp(double.max) == double.infinity ); 525 526 float fn = NaN(0xABC); 527 assert(isIdentical(nextUp(fn), fn)); 528 float f = -float.min_normal*(1-float.epsilon); 529 float f1 = -float.min_normal; 530 assert( nextUp(f1) == f); 531 f = 1.0f+float.epsilon; 532 f1 = 1.0f; 533 assert( nextUp(f1) == f ); 534 f1 = -0.0f; 535 assert( nextUp(f1) == float.min_normal*float.epsilon); 536 assert( nextUp(float.infinity)==float.infinity ); 537 538 assert(nextDown(1.0L+real.epsilon)==1.0); 539 assert(nextDown(1.0+double.epsilon)==1.0); 540 f = 1.0f+float.epsilon; 541 assert(nextDown(f)==1.0); 542 } 543 544 /++ 545 Return the value that lies halfway between x and y on the IEEE number line. 546 547 Formally, the result is the arithmetic mean of the binary significands of x 548 and y, multiplied by the geometric mean of the binary exponents of x and y. 549 x and y must not be NaN. 550 Note: this function is useful for ensuring O(log n) behaviour in algorithms 551 involving a 'binary chop'. 552 553 Params: 554 xx = x value 555 yy = y value 556 557 Special cases: 558 If x and y not null and have opposite sign bits, then `copysign(T(0), y)` is returned. 559 If x and y are within a factor of 2 and have the same sign, (ie, feqrel(x, y) > 0), the return value 560 is the arithmetic mean (x + y) / 2. 561 If x and y are even powers of 2 and have the same sign, the return value is the geometric mean, 562 ieeeMean(x, y) = sgn(x) * sqrt(fabs(x * y)). 563 +/ 564 T ieeeMean(T)(const T xx, const T yy) @trusted pure nothrow @nogc 565 in 566 { 567 assert(xx == xx && yy == yy); 568 } 569 do 570 { 571 import mir.math.common: copysign; 572 T x = xx; 573 T y = yy; 574 575 if (x == 0) 576 { 577 x = copysign(T(0), y); 578 } 579 else 580 if (y == 0) 581 { 582 y = copysign(T(0), x); 583 } 584 else 585 if (signbit(x) != signbit(y)) 586 { 587 return copysign(T(0), y); 588 } 589 590 // The implementation is simple: cast x and y to integers, 591 // average them (avoiding overflow), and cast the result back to a floating-point number. 592 593 mixin floatTraits!(T); 594 T u = 0; 595 static if (realFormat == RealFormat.ieeeExtended) 596 { 597 // There's slight additional complexity because they are actually 598 // 79-bit reals... 599 ushort *ue = cast(ushort *)&u + idx; 600 int ye = (cast(ushort *)&y)[idx]; 601 int xe = (cast(ushort *)&x)[idx]; 602 603 version (LittleEndian) 604 { 605 ulong *ul = cast(ulong *)&u; 606 ulong xl = *cast(ulong *)&x; 607 ulong yl = *cast(ulong *)&y; 608 } 609 else 610 { 611 ulong *ul = cast(ulong *)(cast(short *)&u + 1); 612 ulong xl = *cast(ulong *)(cast(short *)&x + 1); 613 ulong yl = *cast(ulong *)(cast(short *)&y + 1); 614 } 615 616 // Ignore the useless implicit bit. (Bonus: this prevents overflows) 617 ulong m = (xl & 0x7FFF_FFFF_FFFF_FFFFL) + (yl & 0x7FFF_FFFF_FFFF_FFFFL); 618 619 int e = ((xe & exp_mask) + (ye & exp_mask)); 620 if (m & 0x8000_0000_0000_0000L) 621 { 622 ++e; 623 m &= 0x7FFF_FFFF_FFFF_FFFFL; 624 } 625 // Now do a multi-byte right shift 626 const uint c = e & 1; // carry 627 e >>= 1; 628 m >>>= 1; 629 if (c) 630 m |= 0x4000_0000_0000_0000L; // shift carry into significand 631 if (e) 632 *ul = m | 0x8000_0000_0000_0000L; // set implicit bit... 633 else 634 *ul = m; // ... unless exponent is 0 (subnormal or zero). 635 636 *ue = cast(ushort) (e | (xe & 0x8000)); // restore sign bit 637 } 638 else static if (realFormat == RealFormat.ieeeQuadruple) 639 { 640 // This would be trivial if 'ucent' were implemented... 641 ulong *ul = cast(ulong *)&u; 642 ulong *xl = cast(ulong *)&x; 643 ulong *yl = cast(ulong *)&y; 644 645 // Multi-byte add, then multi-byte right shift. 646 import core.checkedint: addu; 647 bool carry; 648 ulong ml = addu(xl[MANTISSA_LSB], yl[MANTISSA_LSB], carry); 649 650 ulong mh = carry + (xl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL) + 651 (yl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL); 652 653 ul[MANTISSA_MSB] = (mh >>> 1) | (xl[MANTISSA_MSB] & 0x8000_0000_0000_0000); 654 ul[MANTISSA_LSB] = (ml >>> 1) | (mh & 1) << 63; 655 } 656 else static if (realFormat == RealFormat.ieeeDouble) 657 { 658 ulong *ul = cast(ulong *)&u; 659 ulong *xl = cast(ulong *)&x; 660 ulong *yl = cast(ulong *)&y; 661 ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFFL) 662 + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL)) >>> 1; 663 m |= ((*xl) & 0x8000_0000_0000_0000L); 664 *ul = m; 665 } 666 else static if (realFormat == RealFormat.ieeeSingle) 667 { 668 uint *ul = cast(uint *)&u; 669 uint *xl = cast(uint *)&x; 670 uint *yl = cast(uint *)&y; 671 uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1; 672 m |= ((*xl) & 0x8000_0000); 673 *ul = m; 674 } 675 else 676 { 677 assert(0, "Not implemented"); 678 } 679 return u; 680 } 681 682 @safe pure nothrow @nogc version(mir_core_test) unittest 683 { 684 assert(ieeeMean(-0.0,-1e-20)<0); 685 assert(ieeeMean(0.0,1e-20)>0); 686 687 assert(ieeeMean(1.0L,4.0L)==2L); 688 assert(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013); 689 assert(ieeeMean(-1.0L,-4.0L)==-2L); 690 assert(ieeeMean(-1.0,-4.0)==-2); 691 assert(ieeeMean(-1.0f,-4.0f)==-2f); 692 assert(ieeeMean(-1.0,-2.0)==-1.5); 693 assert(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon)) 694 ==-1.5*(1+5*real.epsilon)); 695 assert(ieeeMean(0x1p60,0x1p-10)==0x1p25); 696 697 static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended) 698 { 699 assert(ieeeMean(1.0L,real.infinity)==0x1p8192L); 700 assert(ieeeMean(0.0L,real.infinity)==1.5); 701 } 702 assert(ieeeMean(0.5*real.min_normal*(1-4*real.epsilon),0.5*real.min_normal) 703 == 0.5*real.min_normal*(1-2*real.epsilon)); 704 } 705 706 /********************************************************************* 707 * Separate floating point value into significand and exponent. 708 * 709 * Returns: 710 * Calculate and return $(I x) and $(I exp) such that 711 * value =$(I x)*2$(SUPERSCRIPT exp) and 712 * .5 $(LT)= |$(I x)| $(LT) 1.0 713 * 714 * $(I x) has same sign as value. 715 * 716 * $(TABLE_SV 717 * $(TR $(TH value) $(TH returns) $(TH exp)) 718 * $(TR $(TD $(PLUSMN)0.0) $(TD $(PLUSMN)0.0) $(TD 0)) 719 * $(TR $(TD +$(INFIN)) $(TD +$(INFIN)) $(TD unchenged)) 720 * $(TR $(TD -$(INFIN)) $(TD -$(INFIN)) $(TD unchenged)) 721 * $(TR $(TD $(PLUSMN)$(NAN)) $(TD $(PLUSMN)$(NAN)) $(TD unchenged)) 722 * ) 723 */ 724 T frexp(T)(const T value, ref int exp) @trusted pure nothrow @nogc 725 if (isFloatingPoint!T) 726 { 727 import mir.utility: _expect; 728 import mir.math.common: fabs; 729 730 if (__ctfe) 731 { 732 // Handle special cases. 733 if (value == 0) { exp = 0; return value; } 734 if (value != value || fabs(value) == T.infinity) { return value; } 735 // Handle ordinary cases. 736 // In CTFE there is no performance advantage for having separate 737 // paths for different floating point types. 738 T absValue = value < 0 ? -value : value; 739 int expCount; 740 static if (T.mant_dig > double.mant_dig) 741 { 742 for (; absValue >= 0x1.0p+1024L; absValue *= 0x1.0p-1024L) 743 expCount += 1024; 744 for (; absValue < 0x1.0p-1021L; absValue *= 0x1.0p+1021L) 745 expCount -= 1021; 746 } 747 const double dval = cast(double) absValue; 748 int dexp = cast(int) (((*cast(const long*) &dval) >>> 52) & 0x7FF) + double.min_exp - 2; 749 dexp++; 750 expCount += dexp; 751 absValue *= 2.0 ^^ -dexp; 752 // If the original value was subnormal or if it was a real 753 // then absValue can still be outside the [0.5, 1.0) range. 754 if (absValue < 0.5) 755 { 756 assert(T.mant_dig > double.mant_dig || -T.min_normal < value && value < T.min_normal); 757 do 758 { 759 absValue += absValue; 760 expCount--; 761 } while (absValue < 0.5); 762 } 763 else 764 { 765 assert(absValue < 1 || T.mant_dig > double.mant_dig); 766 for (; absValue >= 1; absValue *= T(0.5)) 767 expCount++; 768 } 769 exp = expCount; 770 return value < 0 ? -absValue : absValue; 771 } 772 773 with(floatTraits!T) static if ( 774 realFormat == RealFormat.ieeeExtended 775 || realFormat == RealFormat.ieeeQuadruple 776 || realFormat == RealFormat.ieeeDouble 777 || realFormat == RealFormat.ieeeSingle) 778 { 779 T vf = value; 780 S u = (cast(U*)&vf)[idx]; 781 int e = (u & exp_mask) >>> exp_shft; 782 if (_expect(e, true)) // If exponent is non-zero 783 { 784 if (_expect(e == exp_msh, false)) 785 goto R; 786 exp = e + (T.min_exp - 1); 787 P: 788 u &= ~exp_mask; 789 u ^= exp_nrm; 790 (cast(U*)&vf)[idx] = cast(U)u; 791 R: 792 return vf; 793 } 794 else 795 { 796 static if (realFormat == RealFormat.ieeeExtended) 797 { 798 version (LittleEndian) 799 auto mp = cast(ulong*)&vf; 800 else 801 auto mp = cast(ulong*)((cast(ushort*)&vf) + 1); 802 auto m = u & man_mask | *mp; 803 } 804 else 805 { 806 auto m = u & man_mask; 807 static if (T.sizeof > U.sizeof) 808 m |= (cast(U*)&vf)[MANTISSA_LSB]; 809 } 810 if (!m) 811 { 812 exp = 0; 813 goto R; 814 } 815 vf *= norm_factor; 816 u = (cast(U*)&vf)[idx]; 817 e = (u & exp_mask) >>> exp_shft; 818 exp = e + (T.min_exp - T.mant_dig); 819 goto P; 820 } 821 } 822 else // static if (realFormat == RealFormat.ibmExtended) 823 { 824 static assert(0, "frexp not implemented"); 825 } 826 } 827 828 /// 829 @safe version(mir_core_test) unittest 830 { 831 import mir.math.common: pow, approxEqual; 832 alias isNaN = x => x != x; 833 int exp; 834 real mantissa = frexp(123.456L, exp); 835 836 assert(approxEqual(mantissa * pow(2.0L, cast(real) exp), 123.456L)); 837 838 // special cases, zero 839 assert(frexp(-0.0, exp) == -0.0 && exp == 0); 840 assert(frexp(0.0, exp) == 0.0 && exp == 0); 841 842 // special cases, NaNs and INFs 843 exp = 1234; // random number 844 assert(isNaN(frexp(-real.nan, exp)) && exp == 1234); 845 assert(isNaN(frexp(real.nan, exp)) && exp == 1234); 846 assert(frexp(-real.infinity, exp) == -real.infinity && exp == 1234); 847 assert(frexp(real.infinity, exp) == real.infinity && exp == 1234); 848 } 849 850 @safe @nogc nothrow version(mir_core_test) unittest 851 { 852 import mir.math.common: pow; 853 int exp; 854 real mantissa = frexp(123.456L, exp); 855 856 assert(mantissa * pow(2.0L, cast(real) exp) == 123.456L); 857 } 858 859 @safe version(mir_core_test) unittest 860 { 861 import std.meta : AliasSeq; 862 import std.typecons : tuple, Tuple; 863 864 static foreach (T; AliasSeq!(float, double, real)) 865 {{ 866 enum randomNumber = 12345; 867 Tuple!(T, T, int)[] vals = // x,frexp,exp 868 [ 869 tuple(T(0.0), T( 0.0 ), 0), 870 tuple(T(-0.0), T( -0.0), 0), 871 tuple(T(1.0), T( .5 ), 1), 872 tuple(T(-1.0), T( -.5 ), 1), 873 tuple(T(2.0), T( .5 ), 2), 874 tuple(T(float.min_normal/2.0f), T(.5), -126), 875 tuple(T.infinity, T.infinity, randomNumber), 876 tuple(-T.infinity, -T.infinity, randomNumber), 877 tuple(T.nan, T.nan, randomNumber), 878 tuple(-T.nan, -T.nan, randomNumber), 879 880 // Phobos issue #16026: 881 tuple(3 * (T.min_normal * T.epsilon), T( .75), (T.min_exp - T.mant_dig) + 2) 882 ]; 883 884 foreach (i, elem; vals) 885 { 886 T x = elem[0]; 887 T e = elem[1]; 888 int exp = elem[2]; 889 int eptr = randomNumber; 890 T v = frexp(x, eptr); 891 assert(e == v || (e != e && v != v)); 892 assert(exp == eptr); 893 894 } 895 896 static if (floatTraits!(T).realFormat == RealFormat.ieeeExtended) 897 { 898 static T[3][] extendedvals = [ // x,frexp,exp 899 [0x1.a5f1c2eb3fe4efp+73L, 0x1.A5F1C2EB3FE4EFp-1L, 74], // normal 900 [0x1.fa01712e8f0471ap-1064L, 0x1.fa01712e8f0471ap-1L, -1063], 901 [T.min_normal, .5, -16381], 902 [T.min_normal/2.0L, .5, -16382] // subnormal 903 ]; 904 foreach (elem; extendedvals) 905 { 906 T x = elem[0]; 907 T e = elem[1]; 908 int exp = cast(int) elem[2]; 909 int eptr; 910 T v = frexp(x, eptr); 911 assert(e == v); 912 assert(exp == eptr); 913 914 } 915 } 916 }} 917 } 918 919 @safe version(mir_core_test) unittest 920 { 921 import std.meta : AliasSeq; 922 void foo() { 923 static foreach (T; AliasSeq!(real, double, float)) 924 {{ 925 int exp; 926 const T a = 1; 927 immutable T b = 2; 928 auto c = frexp(a, exp); 929 auto d = frexp(b, exp); 930 }} 931 } 932 } 933 934 /******************************************* 935 * Returns: n * 2$(SUPERSCRIPT exp) 936 * See_Also: $(LERF frexp) 937 */ 938 T ldexp(T)(const T n, int exp) @nogc @trusted pure nothrow 939 if (isFloatingPoint!T) 940 { 941 import core.math: ldexp; 942 return ldexp(n, exp); 943 } 944 945 /// 946 @nogc @safe pure nothrow version(mir_core_test) unittest 947 { 948 import std.meta : AliasSeq; 949 static foreach (T; AliasSeq!(float, double, real)) 950 {{ 951 T r = ldexp(cast(T) 3.0, cast(int) 3); 952 assert(r == 24); 953 954 T n = 3.0; 955 int exp = 3; 956 r = ldexp(n, exp); 957 assert(r == 24); 958 }} 959 } 960 961 @safe pure nothrow @nogc version(mir_core_test) unittest 962 { 963 import mir.math.common; 964 { 965 assert(ldexp(1.0, -1024) == 0x1p-1024); 966 assert(ldexp(1.0, -1022) == 0x1p-1022); 967 int x; 968 double n = frexp(0x1p-1024L, x); 969 assert(n == 0.5); 970 assert(x==-1023); 971 assert(ldexp(n, x)==0x1p-1024); 972 } 973 static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended || 974 floatTraits!(real).realFormat == RealFormat.ieeeQuadruple) 975 { 976 assert(ldexp(1.0L, -16384) == 0x1p-16384L); 977 assert(ldexp(1.0L, -16382) == 0x1p-16382L); 978 int x; 979 real n = frexp(0x1p-16384L, x); 980 assert(n == 0.5L); 981 assert(x==-16383); 982 assert(ldexp(n, x)==0x1p-16384L); 983 } 984 } 985 986 /* workaround Issue 14718, float parsing depends on platform strtold 987 @safe pure nothrow @nogc version(mir_core_test) unittest 988 { 989 assert(ldexp(1.0, -1024) == 0x1p-1024); 990 assert(ldexp(1.0, -1022) == 0x1p-1022); 991 int x; 992 double n = frexp(0x1p-1024, x); 993 assert(n == 0.5); 994 assert(x==-1023); 995 assert(ldexp(n, x)==0x1p-1024); 996 } 997 @safe pure nothrow @nogc version(mir_core_test) unittest 998 { 999 assert(ldexp(1.0f, -128) == 0x1p-128f); 1000 assert(ldexp(1.0f, -126) == 0x1p-126f); 1001 int x; 1002 float n = frexp(0x1p-128f, x); 1003 assert(n == 0.5f); 1004 assert(x==-127); 1005 assert(ldexp(n, x)==0x1p-128f); 1006 } 1007 */ 1008 1009 @safe @nogc nothrow version(mir_core_test) unittest 1010 { 1011 import std.meta: AliasSeq; 1012 static F[3][] vals(F) = // value,exp,ldexp 1013 [ 1014 [ 0, 0, 0], 1015 [ 1, 0, 1], 1016 [ -1, 0, -1], 1017 [ 1, 1, 2], 1018 [ 123, 10, 125952], 1019 [ F.max, int.max, F.infinity], 1020 [ F.max, -int.max, 0], 1021 [ F.min_normal, -int.max, 0], 1022 ]; 1023 static foreach(F; AliasSeq!(double, real)) 1024 {{ 1025 int i; 1026 1027 for (i = 0; i < vals!F.length; i++) 1028 { 1029 F x = vals!F[i][0]; 1030 int exp = cast(int) vals!F[i][1]; 1031 F z = vals!F[i][2]; 1032 F l = ldexp(x, exp); 1033 assert(feqrel(z, l) >= 23); 1034 } 1035 }} 1036 } 1037 1038 package(mir): 1039 1040 // Constants used for extracting the components of the representation. 1041 // They supplement the built-in floating point properties. 1042 template floatTraits(T) 1043 { 1044 // EXPMASK is a ushort mask to select the exponent portion (without sign) 1045 // EXPSHIFT is the number of bits the exponent is left-shifted by in its ushort 1046 // EXPBIAS is the exponent bias - 1 (exp == EXPBIAS yields ×2^-1). 1047 // EXPPOS_SHORT is the index of the exponent when represented as a ushort array. 1048 // SIGNPOS_BYTE is the index of the sign when represented as a ubyte array. 1049 // RECIP_EPSILON is the value such that (smallest_subnormal) * RECIP_EPSILON == T.min_normal 1050 enum norm_factor = 1 / T.epsilon; 1051 static if (T.mant_dig == 24) 1052 { 1053 enum realFormat = RealFormat.ieeeSingle; 1054 } 1055 else static if (T.mant_dig == 53) 1056 { 1057 static if (T.sizeof == 8) 1058 { 1059 enum realFormat = RealFormat.ieeeDouble; 1060 } 1061 else 1062 static assert(false, "No traits support for " ~ T.stringof); 1063 } 1064 else static if (T.mant_dig == 64) 1065 { 1066 enum realFormat = RealFormat.ieeeExtended; 1067 } 1068 else static if (T.mant_dig == 113) 1069 { 1070 enum realFormat = RealFormat.ieeeQuadruple; 1071 } 1072 else 1073 static assert(false, "No traits support for " ~ T.stringof); 1074 1075 static if (realFormat == RealFormat.ieeeExtended) 1076 { 1077 alias S = int; 1078 alias U = ushort; 1079 enum sig_mask = U(1) << (U.sizeof * 8 - 1); 1080 enum exp_shft = 0; 1081 enum man_mask = 0; 1082 version (LittleEndian) 1083 enum idx = 4; 1084 else 1085 enum idx = 0; 1086 } 1087 else 1088 { 1089 static if (realFormat == RealFormat.ieeeQuadruple || realFormat == RealFormat.ieeeDouble && double.sizeof == size_t.sizeof) 1090 { 1091 alias S = long; 1092 alias U = ulong; 1093 } 1094 else 1095 { 1096 alias S = int; 1097 alias U = uint; 1098 } 1099 static if (realFormat == RealFormat.ieeeQuadruple) 1100 alias M = ulong; 1101 else 1102 alias M = U; 1103 enum sig_mask = U(1) << (U.sizeof * 8 - 1); 1104 enum uint exp_shft = T.mant_dig - 1 - (T.sizeof > U.sizeof ? U.sizeof * 8 : 0); 1105 enum man_mask = (U(1) << exp_shft) - 1; 1106 enum idx = T.sizeof > U.sizeof ? MANTISSA_MSB : 0; 1107 } 1108 enum exp_mask = (U.max >> (exp_shft + 1)) << exp_shft; 1109 enum int exp_msh = exp_mask >> exp_shft; 1110 enum intPartMask = man_mask + 1; 1111 enum exp_nrm = S(exp_msh - T.max_exp - 1) << exp_shft; 1112 } 1113 1114 // These apply to all floating-point types 1115 version (LittleEndian) 1116 { 1117 enum MANTISSA_LSB = 0; 1118 enum MANTISSA_MSB = 1; 1119 } 1120 else 1121 { 1122 enum MANTISSA_LSB = 1; 1123 enum MANTISSA_MSB = 0; 1124 }