mir.math.ieee source code

1 /**
2  * Base floating point routines.
3  * 
4  * Macros:
5  *      TABLE_SV = <table border="1" cellpadding="4" cellspacing="0">
6  *              <caption>Special Values</caption>
7  *              $0</table>
8  *      SVH = $(TR $(TH $1) $(TH $2))
9  *      SV  = $(TR $(TD $1) $(TD $2))
10  *      TH3 = $(TR $(TH $1) $(TH $2) $(TH $3))
11  *      TD3 = $(TR $(TD $1) $(TD $2) $(TD $3))
12  *      TABLE_DOMRG = <table border="1" cellpadding="4" cellspacing="0">
13  *              $(SVH Domain X, Range Y)
14                 $(SV $1, $2)
15  *              </table>
16  *      DOMAIN=$1
17  *      RANGE=$1
18  *      NAN = $(RED NAN)
19  *      SUP = <span style="vertical-align:super;font-size:smaller">$0</span>
20  *      GAMMA = &#915;
21  *      THETA = &theta;
22  *      INTEGRAL = &#8747;
23  *      INTEGRATE = $(BIG &#8747;<sub>$(SMALL $1)</sub><sup>$2</sup>)
24  *      POWER = $1<sup>$2</sup>
25  *      SUB = $1<sub>$2</sub>
26  *      BIGSUM = $(BIG &Sigma; <sup>$2</sup><sub>$(SMALL $1)</sub>)
27  *      CHOOSE = $(BIG &#40;) <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG &#41;)
28  *      PLUSMN = &plusmn;
29  *      INFIN = &infin;
30  *      PLUSMNINF = &plusmn;&infin;
31  *      PI = &pi;
32  *      LT = &lt;
33  *      GT = &gt;
34  *      SQRT = &radic;
35  *      HALF = &frac12;
36  *
37  * License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
38  * Authors:   $(HTTP digitalmars.com, Walter Bright), Don Clugston, Ilia Ki
39  */
40 module mir.math.ieee;
41 
42 import mir.internal.utility: isFloatingPoint;
43 
44 /*********************************
45  * Return `true` if sign bit of e is set, `false` if not.
46  */
47 bool signbit(T)(const T x) @nogc @trusted pure nothrow
48 {
49     if (__ctfe)
50     {
51         double dval = cast(double) x; // Precision can increase or decrease but sign won't change (even NaN).
52         return 0 > *cast(long*) &dval;
53     }
54 
55     mixin floatTraits!T;
56 
57     static if (realFormat == RealFormat.ieeeSingle)
58     {
59         return 0 > *cast(int*) &x;
60     }
61     else 
62     static if (realFormat == RealFormat.ieeeDouble)
63     {
64         return 0 > *cast(long*) &x;
65     }
66     else 
67     static if (realFormat == RealFormat.ieeeQuadruple)
68     {
69         return 0 > ((cast(long*)&x)[MANTISSA_MSB]);
70     }
71     else static if (realFormat == RealFormat.ieeeExtended)
72     {
73         version (LittleEndian)
74             auto mp = cast(ubyte*)&x + 9;
75         else
76             auto mp = cast(ubyte*)&x;
77 
78         return (*mp & 0x80) != 0;
79     }
80     else static assert(0, "signbit is not implemented.");
81 }
82 
83 ///
84 @nogc @safe pure nothrow version(mir_core_test) unittest
85 {
86     assert(!signbit(float.nan));
87     assert(signbit(-float.nan));
88     assert(!signbit(168.1234f));
89     assert(signbit(-168.1234f));
90     assert(!signbit(0.0f));
91     assert(signbit(-0.0f));
92     assert(signbit(-float.max));
93     assert(!signbit(float.max));
94 
95     assert(!signbit(double.nan));
96     assert(signbit(-double.nan));
97     assert(!signbit(168.1234));
98     assert(signbit(-168.1234));
99     assert(!signbit(0.0));
100     assert(signbit(-0.0));
101     assert(signbit(-double.max));
102     assert(!signbit(double.max));
103 
104     assert(!signbit(real.nan));
105     assert(signbit(-real.nan));
106     assert(!signbit(168.1234L));
107     assert(signbit(-168.1234L));
108     assert(!signbit(0.0L));
109     assert(signbit(-0.0L));
110     assert(signbit(-real.max));
111     assert(!signbit(real.max));
112 }
113 
114 /**************************************
115  * To what precision is x equal to y?
116  *
117  * Returns: the number of mantissa bits which are equal in x and y.
118  * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision.
119  *
120  *      $(TABLE_SV
121  *      $(TR $(TH x)      $(TH y)          $(TH feqrel(x, y)))
122  *      $(TR $(TD x)      $(TD x)          $(TD real.mant_dig))
123  *      $(TR $(TD x)      $(TD $(GT)= 2*x) $(TD 0))
124  *      $(TR $(TD x)      $(TD $(LT)= x/2) $(TD 0))
125  *      $(TR $(TD $(NAN)) $(TD any)        $(TD 0))
126  *      $(TR $(TD any)    $(TD $(NAN))     $(TD 0))
127  *      )
128  */
129 int feqrel(T)(const T x, const T y) @trusted pure nothrow @nogc
130     if (isFloatingPoint!T)
131 {
132     /* Public Domain. Author: Don Clugston, 18 Aug 2005.
133      */
134     mixin floatTraits!T;
135     static if (realFormat == RealFormat.ieeeSingle
136             || realFormat == RealFormat.ieeeDouble
137             || realFormat == RealFormat.ieeeExtended
138             || realFormat == RealFormat.ieeeQuadruple)
139     {
140         import mir.math.common: fabs;
141 
142         if (x == y)
143             return T.mant_dig; // ensure diff != 0, cope with IN
144 
145         auto diff = fabs(x - y);
146 
147         int a = ((cast(U*)&   x)[idx] & exp_mask) >>> exp_shft;
148         int b = ((cast(U*)&   y)[idx] & exp_mask) >>> exp_shft;
149         int d = ((cast(U*)&diff)[idx] & exp_mask) >>> exp_shft;
150 
151 
152         // The difference in abs(exponent) between x or y and abs(x-y)
153         // is equal to the number of significand bits of x which are
154         // equal to y. If negative, x and y have different exponents.
155         // If positive, x and y are equal to 'bitsdiff' bits.
156         // AND with 0x7FFF to form the absolute value.
157         // To avoid out-by-1 errors, we subtract 1 so it rounds down
158         // if the exponents were different. This means 'bitsdiff' is
159         // always 1 lower than we want, except that if bitsdiff == 0,
160         // they could have 0 or 1 bits in common.
161 
162         int bitsdiff = ((a + b - 1) >> 1) - d;
163         if (d == 0)
164         {   // Difference is subnormal
165             // For subnormals, we need to add the number of zeros that
166             // lie at the start of diff's significand.
167             // We do this by multiplying by 2^^real.mant_dig
168             diff *= norm_factor;
169             return bitsdiff + T.mant_dig - int(((cast(U*)&diff)[idx] & exp_mask) >>> exp_shft);
170         }
171 
172         if (bitsdiff > 0)
173             return bitsdiff + 1; // add the 1 we subtracted before
174 
175         // Avoid out-by-1 errors when factor is almost 2.
176         if (bitsdiff == 0 && (a ^ b) == 0)
177             return 1;
178         else
179             return 0;
180     }
181     else
182     {
183         static assert(false, "Not implemented for this architecture");
184     }
185 }
186 
187 ///
188 @safe pure version(mir_core_test) unittest
189 {
190     assert(feqrel(2.0, 2.0) == 53);
191     assert(feqrel(2.0f, 2.0f) == 24);
192     assert(feqrel(2.0, double.nan) == 0);
193 
194     // Test that numbers are within n digits of each
195     // other by testing if feqrel > n * log2(10)
196 
197     // five digits
198     assert(feqrel(2.0, 2.00001) > 16);
199     // ten digits
200     assert(feqrel(2.0, 2.00000000001) > 33);
201 }
202 
203 @safe pure nothrow @nogc version(mir_core_test) unittest
204 {
205     void testFeqrel(F)()
206     {
207        // Exact equality
208        assert(feqrel(F.max, F.max) == F.mant_dig);
209        assert(feqrel!(F)(0.0, 0.0) == F.mant_dig);
210        assert(feqrel(F.infinity, F.infinity) == F.mant_dig);
211 
212        // a few bits away from exact equality
213        F w=1;
214        for (int i = 1; i < F.mant_dig - 1; ++i)
215        {
216           assert(feqrel!(F)(1.0 + w * F.epsilon, 1.0) == F.mant_dig-i);
217           assert(feqrel!(F)(1.0 - w * F.epsilon, 1.0) == F.mant_dig-i);
218           assert(feqrel!(F)(1.0, 1 + (w-1) * F.epsilon) == F.mant_dig - i + 1);
219           w*=2;
220        }
221 
222        assert(feqrel!(F)(1.5+F.epsilon, 1.5) == F.mant_dig-1);
223        assert(feqrel!(F)(1.5-F.epsilon, 1.5) == F.mant_dig-1);
224        assert(feqrel!(F)(1.5-F.epsilon, 1.5+F.epsilon) == F.mant_dig-2);
225 
226 
227        // Numbers that are close
228        assert(feqrel!(F)(0x1.Bp+84, 0x1.B8p+84) == 5);
229        assert(feqrel!(F)(0x1.8p+10, 0x1.Cp+10) == 2);
230        assert(feqrel!(F)(1.5 * (1 - F.epsilon), 1.0L) == 2);
231        assert(feqrel!(F)(1.5, 1.0) == 1);
232        assert(feqrel!(F)(2 * (1 - F.epsilon), 1.0L) == 1);
233 
234        // Factors of 2
235        assert(feqrel(F.max, F.infinity) == 0);
236        assert(feqrel!(F)(2 * (1 - F.epsilon), 1.0L) == 1);
237        assert(feqrel!(F)(1.0, 2.0) == 0);
238        assert(feqrel!(F)(4.0, 1.0) == 0);
239 
240        // Extreme inequality
241        assert(feqrel(F.nan, F.nan) == 0);
242        assert(feqrel!(F)(0.0L, -F.nan) == 0);
243        assert(feqrel(F.nan, F.infinity) == 0);
244        assert(feqrel(F.infinity, -F.infinity) == 0);
245        assert(feqrel(F.max, -F.max) == 0);
246 
247        assert(feqrel(F.min_normal / 8, F.min_normal / 17) == 3);
248 
249        const F Const = 2;
250        immutable F Immutable = 2;
251        auto Compiles = feqrel(Const, Immutable);
252     }
253 
254     assert(feqrel(7.1824L, 7.1824L) == real.mant_dig);
255 
256     testFeqrel!(float)();
257     testFeqrel!(double)();
258     testFeqrel!(real)();
259 }
260 
261 /++
262 +/
263 enum RealFormat
264 {
265     ///
266     ieeeHalf,
267     ///
268     ieeeSingle,
269     ///
270     ieeeDouble,
271     /// x87 80-bit real
272     ieeeExtended,
273     /// x87 real rounded to precision of double.
274     ieeeExtended53,
275     /// IBM 128-bit extended
276     ibmExtended,
277     ///
278     ieeeQuadruple,
279 }
280 
281 /**
282  * Calculate the next largest floating point value after x.
283  *
284  * Return the least number greater than x that is representable as a real;
285  * thus, it gives the next point on the IEEE number line.
286  *
287  *  $(TABLE_SV
288  *    $(SVH x,            nextUp(x)   )
289  *    $(SV  -$(INFIN),    -real.max   )
290  *    $(SV  $(PLUSMN)0.0, real.min_normal*real.epsilon )
291  *    $(SV  real.max,     $(INFIN) )
292  *    $(SV  $(INFIN),     $(INFIN) )
293  *    $(SV  $(NAN),       $(NAN)   )
294  * )
295  */
296 T nextUp(T)(const T x) @trusted pure nothrow @nogc
297     if (isFloatingPoint!T)
298 {
299     mixin floatTraits!T;
300     static if (realFormat == RealFormat.ieeeSingle)
301     {
302         uint s = *cast(uint*)&x;
303         if ((s & 0x7F80_0000) == 0x7F80_0000)
304         {
305             // First, deal with NANs and infinity
306             if (x == -x.infinity) return -x.max;
307 
308             return x; // +INF and NAN are unchanged.
309         }
310         if (s > 0x8000_0000)   // Negative number
311         {
312             --s;
313         }
314         else
315         if (s == 0x8000_0000) // it was negative zero
316         {
317             s = 0x0000_0001; // change to smallest subnormal
318         }
319         else
320         {
321             // Positive number
322             ++s;
323         }
324     R:
325         return *cast(T*)&s;
326     }
327     else static if (realFormat == RealFormat.ieeeDouble)
328     {
329         ulong s = *cast(ulong*)&x;
330 
331         if ((s & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000)
332         {
333             // First, deal with NANs and infinity
334             if (x == -x.infinity) return -x.max;
335             return x; // +INF and NAN are unchanged.
336         }
337         if (s > 0x8000_0000_0000_0000)   // Negative number
338         {
339             --s;
340         }
341         else
342         if (s == 0x8000_0000_0000_0000) // it was negative zero
343         {
344             s = 0x0000_0000_0000_0001; // change to smallest subnormal
345         }
346         else
347         {
348             // Positive number
349             ++s;
350         }
351     R:
352         return *cast(T*)&s;
353     }
354     else static if (realFormat == RealFormat.ieeeQuadruple)
355     {
356         auto e = exp_mask & (cast(U *)&x)[idx];
357         if (e == exp_mask)
358         {
359             // NaN or Infinity
360             if (x == -real.infinity) return -real.max;
361             return x; // +Inf and NaN are unchanged.
362         }
363 
364         auto ps = cast(ulong *)&x;
365         if (ps[MANTISSA_MSB] & 0x8000_0000_0000_0000)
366         {
367             // Negative number
368             if (ps[MANTISSA_LSB] == 0 && ps[MANTISSA_MSB] == 0x8000_0000_0000_0000)
369             {
370                 // it was negative zero, change to smallest subnormal
371                 ps[MANTISSA_LSB] = 1;
372                 ps[MANTISSA_MSB] = 0;
373                 return x;
374             }
375             if (ps[MANTISSA_LSB] == 0) --ps[MANTISSA_MSB];
376             --ps[MANTISSA_LSB];
377         }
378         else
379         {
380             // Positive number
381             ++ps[MANTISSA_LSB];
382             if (ps[MANTISSA_LSB] == 0) ++ps[MANTISSA_MSB];
383         }
384         return x;
385     }
386     else static if (realFormat == RealFormat.ieeeExtended)
387     {
388         // For 80-bit reals, the "implied bit" is a nuisance...
389         auto pe = cast(U*)&x + idx;
390         version (LittleEndian)
391             auto ps = cast(ulong*)&x;
392         else
393             auto ps = cast(ulong*)((cast(ushort*)&x) + 1);
394 
395         if ((*pe & exp_mask) == exp_mask)
396         {
397             // First, deal with NANs and infinity
398             if (x == -real.infinity) return -real.max;
399             return x; // +Inf and NaN are unchanged.
400         }
401         if (*pe & 0x8000)
402         {
403             // Negative number -- need to decrease the significand
404             --*ps;
405             // Need to mask with 0x7FF.. so subnormals are treated correctly.
406             if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_FFFF_FFFF_FFFF)
407             {
408                 if (*pe == 0x8000)   // it was negative zero
409                 {
410                     *ps = 1;
411                     *pe = 0; // smallest subnormal.
412                     return x;
413                 }
414 
415                 --*pe;
416 
417                 if (*pe == 0x8000)
418                     return x; // it's become a subnormal, implied bit stays low.
419 
420                 *ps = 0xFFFF_FFFF_FFFF_FFFF; // set the implied bit
421                 return x;
422             }
423             return x;
424         }
425         else
426         {
427             // Positive number -- need to increase the significand.
428             // Works automatically for positive zero.
429             ++*ps;
430             if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0)
431             {
432                 // change in exponent
433                 ++*pe;
434                 *ps = 0x8000_0000_0000_0000; // set the high bit
435             }
436         }
437         return x;
438     }
439     else // static if (realFormat == RealFormat.ibmExtended)
440     {
441         assert(0, "nextUp not implemented");
442     }
443 }
444 
445 ///
446 @safe @nogc pure nothrow version(mir_core_test) unittest
447 {
448     assert(nextUp(1.0 - 1.0e-6).feqrel(0.999999) > 16);
449     assert(nextUp(1.0 - real.epsilon).feqrel(1.0) > 16);
450 }
451 
452 /**
453  * Calculate the next smallest floating point value before x.
454  *
455  * Return the greatest number less than x that is representable as a real;
456  * thus, it gives the previous point on the IEEE number line.
457  *
458  *  $(TABLE_SV
459  *    $(SVH x,            nextDown(x)   )
460  *    $(SV  $(INFIN),     real.max  )
461  *    $(SV  $(PLUSMN)0.0, -real.min_normal*real.epsilon )
462  *    $(SV  -real.max,    -$(INFIN) )
463  *    $(SV  -$(INFIN),    -$(INFIN) )
464  *    $(SV  $(NAN),       $(NAN)    )
465  * )
466  */
467 T nextDown(T)(const T x) @safe pure nothrow @nogc
468 {
469     return -nextUp(-x);
470 }
471 
472 ///
473 @safe pure nothrow @nogc version(mir_core_test) unittest
474 {
475     assert( nextDown(1.0 + real.epsilon) == 1.0);
476 }
477 
478 @safe pure nothrow @nogc version(mir_core_test) unittest
479 {
480     import std.math: NaN, isIdentical;
481 
482     static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended)
483     {
484 
485         // Tests for 80-bit reals
486         assert(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC)));
487         // negative numbers
488         assert( nextUp(-real.infinity) == -real.max );
489         assert( nextUp(-1.0L-real.epsilon) == -1.0 );
490         assert( nextUp(-2.0L) == -2.0 + real.epsilon);
491         // subnormals and zero
492         assert( nextUp(-real.min_normal) == -real.min_normal*(1-real.epsilon) );
493         assert( nextUp(-real.min_normal*(1-real.epsilon)) == -real.min_normal*(1-2*real.epsilon) );
494         assert( isIdentical(-0.0L, nextUp(-real.min_normal*real.epsilon)) );
495         assert( nextUp(-0.0L) == real.min_normal*real.epsilon );
496         assert( nextUp(0.0L) == real.min_normal*real.epsilon );
497         assert( nextUp(real.min_normal*(1-real.epsilon)) == real.min_normal );
498         assert( nextUp(real.min_normal) == real.min_normal*(1+real.epsilon) );
499         // positive numbers
500         assert( nextUp(1.0L) == 1.0 + real.epsilon );
501         assert( nextUp(2.0L-real.epsilon) == 2.0 );
502         assert( nextUp(real.max) == real.infinity );
503         assert( nextUp(real.infinity)==real.infinity );
504     }
505 
506     double n = NaN(0xABC);
507     assert(isIdentical(nextUp(n), n));
508     // negative numbers
509     assert( nextUp(-double.infinity) == -double.max );
510     assert( nextUp(-1-double.epsilon) == -1.0 );
511     assert( nextUp(-2.0) == -2.0 + double.epsilon);
512     // subnormals and zero
513 
514     assert( nextUp(-double.min_normal) == -double.min_normal*(1-double.epsilon) );
515     assert( nextUp(-double.min_normal*(1-double.epsilon)) == -double.min_normal*(1-2*double.epsilon) );
516     assert( isIdentical(-0.0, nextUp(-double.min_normal*double.epsilon)) );
517     assert( nextUp(0.0) == double.min_normal*double.epsilon );
518     assert( nextUp(-0.0) == double.min_normal*double.epsilon );
519     assert( nextUp(double.min_normal*(1-double.epsilon)) == double.min_normal );
520     assert( nextUp(double.min_normal) == double.min_normal*(1+double.epsilon) );
521     // positive numbers
522     assert( nextUp(1.0) == 1.0 + double.epsilon );
523     assert( nextUp(2.0-double.epsilon) == 2.0 );
524     assert( nextUp(double.max) == double.infinity );
525 
526     float fn = NaN(0xABC);
527     assert(isIdentical(nextUp(fn), fn));
528     float f = -float.min_normal*(1-float.epsilon);
529     float f1 = -float.min_normal;
530     assert( nextUp(f1) ==  f);
531     f = 1.0f+float.epsilon;
532     f1 = 1.0f;
533     assert( nextUp(f1) == f );
534     f1 = -0.0f;
535     assert( nextUp(f1) == float.min_normal*float.epsilon);
536     assert( nextUp(float.infinity)==float.infinity );
537 
538     assert(nextDown(1.0L+real.epsilon)==1.0);
539     assert(nextDown(1.0+double.epsilon)==1.0);
540     f = 1.0f+float.epsilon;
541     assert(nextDown(f)==1.0);
542 }
543 
544 /++
545 Return the value that lies halfway between x and y on the IEEE number line.
546 
547 Formally, the result is the arithmetic mean of the binary significands of x
548 and y, multiplied by the geometric mean of the binary exponents of x and y.
549 x and y must not be NaN.
550 Note: this function is useful for ensuring O(log n) behaviour in algorithms
551 involving a 'binary chop'.
552 
553 Params:
554     xx = x value
555     yy = y value
556 
557 Special cases:
558 If x and y not null and have opposite sign bits, then `copysign(T(0), y)` is returned.
559 If x and y are within a factor of 2 and have the same sign, (ie, feqrel(x, y) > 0), the return value
560 is the arithmetic mean (x + y) / 2.
561 If x and y are even powers of 2 and have the same sign, the return value is the geometric mean,
562 ieeeMean(x, y) = sgn(x) * sqrt(fabs(x * y)).
563 +/
564 T ieeeMean(T)(const T xx, const T yy) @trusted pure nothrow @nogc
565 in
566 {
567     assert(xx == xx && yy == yy);
568 }
569 do
570 {
571     import mir.math.common: copysign;
572     T x = xx;
573     T y = yy;
574 
575     if (x == 0)
576     {
577         x = copysign(T(0), y);
578     }
579     else
580     if (y == 0)
581     {
582         y = copysign(T(0), x);
583     }
584     else
585     if (signbit(x) != signbit(y))
586     {
587         return copysign(T(0), y);
588     }
589 
590     // The implementation is simple: cast x and y to integers,
591     // average them (avoiding overflow), and cast the result back to a floating-point number.
592 
593     mixin floatTraits!(T);
594     T u = 0;
595     static if (realFormat == RealFormat.ieeeExtended)
596     {
597         // There's slight additional complexity because they are actually
598         // 79-bit reals...
599         ushort *ue = cast(ushort *)&u + idx;
600         int ye = (cast(ushort *)&y)[idx];
601         int xe = (cast(ushort *)&x)[idx];
602 
603         version (LittleEndian)
604         {
605             ulong *ul = cast(ulong *)&u;
606             ulong xl = *cast(ulong *)&x;
607             ulong yl = *cast(ulong *)&y;
608         }
609         else
610         {
611             ulong *ul = cast(ulong *)(cast(short *)&u + 1);
612             ulong xl = *cast(ulong *)(cast(short *)&x + 1);
613             ulong yl = *cast(ulong *)(cast(short *)&y + 1);
614         }
615 
616         // Ignore the useless implicit bit. (Bonus: this prevents overflows)
617         ulong m = (xl & 0x7FFF_FFFF_FFFF_FFFFL) + (yl & 0x7FFF_FFFF_FFFF_FFFFL);
618 
619         int e = ((xe & exp_mask) + (ye & exp_mask));
620         if (m & 0x8000_0000_0000_0000L)
621         {
622             ++e;
623             m &= 0x7FFF_FFFF_FFFF_FFFFL;
624         }
625         // Now do a multi-byte right shift
626         const uint c = e & 1; // carry
627         e >>= 1;
628         m >>>= 1;
629         if (c)
630             m |= 0x4000_0000_0000_0000L; // shift carry into significand
631         if (e)
632             *ul = m | 0x8000_0000_0000_0000L; // set implicit bit...
633         else
634             *ul = m; // ... unless exponent is 0 (subnormal or zero).
635 
636         *ue = cast(ushort) (e | (xe & 0x8000)); // restore sign bit
637     }
638     else static if (realFormat == RealFormat.ieeeQuadruple)
639     {
640         // This would be trivial if 'ucent' were implemented...
641         ulong *ul = cast(ulong *)&u;
642         ulong *xl = cast(ulong *)&x;
643         ulong *yl = cast(ulong *)&y;
644 
645         // Multi-byte add, then multi-byte right shift.
646         import core.checkedint: addu;
647         bool carry;
648         ulong ml = addu(xl[MANTISSA_LSB], yl[MANTISSA_LSB], carry);
649 
650         ulong mh = carry + (xl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL) +
651             (yl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL);
652 
653         ul[MANTISSA_MSB] = (mh >>> 1) | (xl[MANTISSA_MSB] & 0x8000_0000_0000_0000);
654         ul[MANTISSA_LSB] = (ml >>> 1) | (mh & 1) << 63;
655     }
656     else static if (realFormat == RealFormat.ieeeDouble)
657     {
658         ulong *ul = cast(ulong *)&u;
659         ulong *xl = cast(ulong *)&x;
660         ulong *yl = cast(ulong *)&y;
661         ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFFL)
662                    + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL)) >>> 1;
663         m |= ((*xl) & 0x8000_0000_0000_0000L);
664         *ul = m;
665     }
666     else static if (realFormat == RealFormat.ieeeSingle)
667     {
668         uint *ul = cast(uint *)&u;
669         uint *xl = cast(uint *)&x;
670         uint *yl = cast(uint *)&y;
671         uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1;
672         m |= ((*xl) & 0x8000_0000);
673         *ul = m;
674     }
675     else
676     {
677         assert(0, "Not implemented");
678     }
679     return u;
680 }
681 
682 @safe pure nothrow @nogc version(mir_core_test) unittest
683 {
684     assert(ieeeMean(-0.0,-1e-20)<0);
685     assert(ieeeMean(0.0,1e-20)>0);
686 
687     assert(ieeeMean(1.0L,4.0L)==2L);
688     assert(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013);
689     assert(ieeeMean(-1.0L,-4.0L)==-2L);
690     assert(ieeeMean(-1.0,-4.0)==-2);
691     assert(ieeeMean(-1.0f,-4.0f)==-2f);
692     assert(ieeeMean(-1.0,-2.0)==-1.5);
693     assert(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon))
694                  ==-1.5*(1+5*real.epsilon));
695     assert(ieeeMean(0x1p60,0x1p-10)==0x1p25);
696 
697     static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended)
698     {
699       assert(ieeeMean(1.0L,real.infinity)==0x1p8192L);
700       assert(ieeeMean(0.0L,real.infinity)==1.5);
701     }
702     assert(ieeeMean(0.5*real.min_normal*(1-4*real.epsilon),0.5*real.min_normal)
703            == 0.5*real.min_normal*(1-2*real.epsilon));
704 }
705 
706 /*********************************************************************
707  * Separate floating point value into significand and exponent.
708  *
709  * Returns:
710  *      Calculate and return $(I x) and $(I exp) such that
711  *      value =$(I x)*2$(SUPERSCRIPT exp) and
712  *      .5 $(LT)= |$(I x)| $(LT) 1.0
713  *
714  *      $(I x) has same sign as value.
715  *
716  *      $(TABLE_SV
717  *      $(TR $(TH value)           $(TH returns)         $(TH exp))
718  *      $(TR $(TD $(PLUSMN)0.0)    $(TD $(PLUSMN)0.0)    $(TD 0))
719  *      $(TR $(TD +$(INFIN))       $(TD +$(INFIN))       $(TD unchenged))
720  *      $(TR $(TD -$(INFIN))       $(TD -$(INFIN))       $(TD unchenged))
721  *      $(TR $(TD $(PLUSMN)$(NAN)) $(TD $(PLUSMN)$(NAN)) $(TD unchenged))
722  *      )
723  */
724 T frexp(T)(const T value, ref int exp) @trusted pure nothrow @nogc
725 if (isFloatingPoint!T)
726 {
727     import mir.utility: _expect;
728     import mir.math.common: fabs;
729 
730     if (__ctfe)
731     {
732         // Handle special cases.
733         if (value == 0) { exp = 0; return value; }
734         if (value != value || fabs(value) == T.infinity) { return value; }
735         // Handle ordinary cases.
736         // In CTFE there is no performance advantage for having separate
737         // paths for different floating point types.
738         T absValue = value < 0 ? -value : value;
739         int expCount;
740         static if (T.mant_dig > double.mant_dig)
741         {
742             for (; absValue >= 0x1.0p+1024L; absValue *= 0x1.0p-1024L)
743                 expCount += 1024;
744             for (; absValue < 0x1.0p-1021L; absValue *= 0x1.0p+1021L)
745                 expCount -= 1021;
746         }
747         const double dval = cast(double) absValue;
748         int dexp = cast(int) (((*cast(const long*) &dval) >>> 52) & 0x7FF) + double.min_exp - 2;
749         dexp++;
750         expCount += dexp;
751         absValue *= 2.0 ^^ -dexp;
752         // If the original value was subnormal or if it was a real
753         // then absValue can still be outside the [0.5, 1.0) range.
754         if (absValue < 0.5)
755         {
756             assert(T.mant_dig > double.mant_dig || -T.min_normal < value && value < T.min_normal);
757             do
758             {
759                 absValue += absValue;
760                 expCount--;
761             } while (absValue < 0.5);
762         }
763         else
764         {
765             assert(absValue < 1 || T.mant_dig > double.mant_dig);
766             for (; absValue >= 1; absValue *= T(0.5))
767                 expCount++;
768         }
769         exp = expCount;
770         return value < 0 ? -absValue : absValue;
771     }
772 
773     with(floatTraits!T) static if (
774         realFormat == RealFormat.ieeeExtended
775      || realFormat == RealFormat.ieeeQuadruple
776      || realFormat == RealFormat.ieeeDouble
777      || realFormat == RealFormat.ieeeSingle)
778     {
779         T vf = value;
780         S u = (cast(U*)&vf)[idx];
781         int e = (u & exp_mask) >>> exp_shft;
782         if (_expect(e, true)) // If exponent is non-zero
783         {
784             if (_expect(e == exp_msh, false))
785                 goto R;
786             exp = e + (T.min_exp - 1);
787         P:
788             u &= ~exp_mask;
789             u ^= exp_nrm;
790             (cast(U*)&vf)[idx] = cast(U)u;
791         R:
792             return vf;
793         }
794         else
795         {
796             static if (realFormat == RealFormat.ieeeExtended)
797             {
798                 version (LittleEndian)
799                     auto mp = cast(ulong*)&vf;
800                 else
801                     auto mp = cast(ulong*)((cast(ushort*)&vf) + 1);
802                 auto m = u & man_mask | *mp;
803             }
804             else
805             {
806                 auto m = u & man_mask;
807                 static if (T.sizeof > U.sizeof)
808                     m |= (cast(U*)&vf)[MANTISSA_LSB];
809             }
810             if (!m)
811             {
812                 exp = 0;
813                 goto R;
814             }
815             vf *= norm_factor;
816             u = (cast(U*)&vf)[idx];
817             e = (u & exp_mask) >>> exp_shft;
818             exp = e + (T.min_exp - T.mant_dig);
819             goto P;
820         }
821     }
822     else // static if (realFormat == RealFormat.ibmExtended)
823     {
824         static assert(0, "frexp not implemented");
825     }
826 }
827 
828 ///
829 @safe version(mir_core_test) unittest
830 {
831     import mir.math.common: pow, approxEqual;
832     alias isNaN = x => x != x;
833     int exp;
834     real mantissa = frexp(123.456L, exp);
835 
836     assert(approxEqual(mantissa * pow(2.0L, cast(real) exp), 123.456L));
837 
838     // special cases, zero
839     assert(frexp(-0.0, exp) == -0.0 && exp == 0);
840     assert(frexp(0.0, exp) == 0.0 && exp == 0);
841 
842     // special cases, NaNs and INFs
843     exp = 1234; // random number
844     assert(isNaN(frexp(-real.nan, exp)) && exp == 1234);
845     assert(isNaN(frexp(real.nan, exp)) && exp == 1234);
846     assert(frexp(-real.infinity, exp) == -real.infinity && exp == 1234);
847     assert(frexp(real.infinity, exp) == real.infinity && exp == 1234);
848 }
849 
850 @safe @nogc nothrow version(mir_core_test) unittest
851 {
852     import mir.math.common: pow;
853     int exp;
854     real mantissa = frexp(123.456L, exp);
855 
856     assert(mantissa * pow(2.0L, cast(real) exp) == 123.456L);
857 }
858 
859 @safe version(mir_core_test) unittest
860 {
861     import std.meta : AliasSeq;
862     import std.typecons : tuple, Tuple;
863 
864     static foreach (T; AliasSeq!(float, double, real))
865     {{
866         enum randomNumber = 12345;
867         Tuple!(T, T, int)[] vals =     // x,frexp,exp
868             [
869              tuple(T(0.0),  T( 0.0 ), 0),
870              tuple(T(-0.0), T( -0.0), 0),
871              tuple(T(1.0),  T( .5  ), 1),
872              tuple(T(-1.0), T( -.5 ), 1),
873              tuple(T(2.0),  T( .5  ), 2),
874              tuple(T(float.min_normal/2.0f), T(.5), -126),
875              tuple(T.infinity, T.infinity, randomNumber),
876              tuple(-T.infinity, -T.infinity, randomNumber),
877              tuple(T.nan, T.nan, randomNumber),
878              tuple(-T.nan, -T.nan, randomNumber),
879 
880              // Phobos issue #16026:
881              tuple(3 * (T.min_normal * T.epsilon), T( .75), (T.min_exp - T.mant_dig) + 2)
882              ];
883 
884         foreach (i, elem; vals)
885         {
886             T x = elem[0];
887             T e = elem[1];
888             int exp = elem[2];
889             int eptr = randomNumber;
890             T v = frexp(x, eptr);
891             assert(e == v || (e != e && v != v));
892             assert(exp == eptr);
893 
894         }
895 
896         static if (floatTraits!(T).realFormat == RealFormat.ieeeExtended)
897         {
898             static T[3][] extendedvals = [ // x,frexp,exp
899                 [0x1.a5f1c2eb3fe4efp+73L,    0x1.A5F1C2EB3FE4EFp-1L,     74],    // normal
900                 [0x1.fa01712e8f0471ap-1064L, 0x1.fa01712e8f0471ap-1L, -1063],
901                 [T.min_normal,      .5, -16381],
902                 [T.min_normal/2.0L, .5, -16382]    // subnormal
903             ];
904             foreach (elem; extendedvals)
905             {
906                 T x = elem[0];
907                 T e = elem[1];
908                 int exp = cast(int) elem[2];
909                 int eptr;
910                 T v = frexp(x, eptr);
911                 assert(e == v);
912                 assert(exp == eptr);
913 
914             }
915         }
916     }}
917 }
918 
919 @safe version(mir_core_test) unittest
920 {
921     import std.meta : AliasSeq;
922     void foo() {
923         static foreach (T; AliasSeq!(real, double, float))
924         {{
925             int exp;
926             const T a = 1;
927             immutable T b = 2;
928             auto c = frexp(a, exp);
929             auto d = frexp(b, exp);
930         }}
931     }
932 }
933 
934 /*******************************************
935  * Returns: n * 2$(SUPERSCRIPT exp)
936  * See_Also: $(LERF frexp)
937  */
938 T ldexp(T)(const T n, int exp) @nogc @trusted pure nothrow
939     if (isFloatingPoint!T)
940 {
941     import core.math: ldexp;
942     return ldexp(n, exp);
943 }
944 
945 ///
946 @nogc @safe pure nothrow version(mir_core_test) unittest
947 {
948     import std.meta : AliasSeq;
949     static foreach (T; AliasSeq!(float, double, real))
950     {{
951         T r = ldexp(cast(T) 3.0, cast(int) 3);
952         assert(r == 24);
953 
954         T n = 3.0;
955         int exp = 3;
956         r = ldexp(n, exp);
957         assert(r == 24);
958     }}
959 }
960 
961 @safe pure nothrow @nogc version(mir_core_test) unittest
962 {
963     import mir.math.common;
964     {
965         assert(ldexp(1.0, -1024) == 0x1p-1024);
966         assert(ldexp(1.0, -1022) == 0x1p-1022);
967         int x;
968         double n = frexp(0x1p-1024L, x);
969         assert(n == 0.5);
970         assert(x==-1023);
971         assert(ldexp(n, x)==0x1p-1024);
972     }
973     static if (floatTraits!(real).realFormat == RealFormat.ieeeExtended ||
974                floatTraits!(real).realFormat == RealFormat.ieeeQuadruple)
975     {
976         assert(ldexp(1.0L, -16384) == 0x1p-16384L);
977         assert(ldexp(1.0L, -16382) == 0x1p-16382L);
978         int x;
979         real n = frexp(0x1p-16384L, x);
980         assert(n == 0.5L);
981         assert(x==-16383);
982         assert(ldexp(n, x)==0x1p-16384L);
983     }
984 }
985 
986 /* workaround Issue 14718, float parsing depends on platform strtold
987 @safe pure nothrow @nogc version(mir_core_test) unittest
988 {
989     assert(ldexp(1.0, -1024) == 0x1p-1024);
990     assert(ldexp(1.0, -1022) == 0x1p-1022);
991     int x;
992     double n = frexp(0x1p-1024, x);
993     assert(n == 0.5);
994     assert(x==-1023);
995     assert(ldexp(n, x)==0x1p-1024);
996 }
997 @safe pure nothrow @nogc version(mir_core_test) unittest
998 {
999     assert(ldexp(1.0f, -128) == 0x1p-128f);
1000     assert(ldexp(1.0f, -126) == 0x1p-126f);
1001     int x;
1002     float n = frexp(0x1p-128f, x);
1003     assert(n == 0.5f);
1004     assert(x==-127);
1005     assert(ldexp(n, x)==0x1p-128f);
1006 }
1007 */
1008 
1009 @safe @nogc nothrow version(mir_core_test) unittest
1010 {
1011     import std.meta: AliasSeq;
1012     static F[3][] vals(F) =    // value,exp,ldexp
1013     [
1014     [    0,    0,    0],
1015     [    1,    0,    1],
1016     [    -1,    0,    -1],
1017     [    1,    1,    2],
1018     [    123,    10,    125952],
1019     [    F.max,    int.max,    F.infinity],
1020     [    F.max,    -int.max,    0],
1021     [    F.min_normal,    -int.max,    0],
1022     ];
1023     static foreach(F; AliasSeq!(double, real))
1024     {{
1025         int i;
1026 
1027         for (i = 0; i < vals!F.length; i++)
1028         {
1029             F x = vals!F[i][0];
1030             int exp = cast(int) vals!F[i][1];
1031             F z = vals!F[i][2];
1032             F l = ldexp(x, exp);
1033             assert(feqrel(z, l) >= 23);
1034         }
1035     }}
1036 }
1037 
1038 package(mir):
1039 
1040 // Constants used for extracting the components of the representation.
1041 // They supplement the built-in floating point properties.
1042 template floatTraits(T)
1043 {
1044     // EXPMASK is a ushort mask to select the exponent portion (without sign)
1045     // EXPSHIFT is the number of bits the exponent is left-shifted by in its ushort
1046     // EXPBIAS is the exponent bias - 1 (exp == EXPBIAS yields ×2^-1).
1047     // EXPPOS_SHORT is the index of the exponent when represented as a ushort array.
1048     // SIGNPOS_BYTE is the index of the sign when represented as a ubyte array.
1049     // RECIP_EPSILON is the value such that (smallest_subnormal) * RECIP_EPSILON == T.min_normal
1050     enum norm_factor = 1 / T.epsilon;
1051     static if (T.mant_dig == 24)
1052     {
1053         enum realFormat = RealFormat.ieeeSingle;
1054     }
1055     else static if (T.mant_dig == 53)
1056     {
1057         static if (T.sizeof == 8)
1058         {
1059             enum realFormat = RealFormat.ieeeDouble;
1060         }
1061         else
1062             static assert(false, "No traits support for " ~ T.stringof);
1063     }
1064     else static if (T.mant_dig == 64)
1065     {
1066         enum realFormat = RealFormat.ieeeExtended;
1067     }
1068     else static if (T.mant_dig == 113)
1069     {
1070         enum realFormat = RealFormat.ieeeQuadruple;
1071     }
1072     else
1073         static assert(false, "No traits support for " ~ T.stringof);
1074 
1075     static if (realFormat == RealFormat.ieeeExtended)
1076     {
1077         alias S = int;
1078         alias U = ushort;
1079         enum sig_mask = U(1) << (U.sizeof * 8 - 1);
1080         enum exp_shft = 0;
1081         enum man_mask = 0;
1082         version (LittleEndian)
1083             enum idx = 4;
1084         else
1085             enum idx = 0;
1086     }
1087     else
1088     {
1089         static if (realFormat == RealFormat.ieeeQuadruple || realFormat == RealFormat.ieeeDouble && double.sizeof == size_t.sizeof)
1090         {
1091             alias S = long;
1092             alias U = ulong;
1093         }
1094         else
1095         {
1096             alias S = int;
1097             alias U = uint;
1098         }
1099         static if (realFormat == RealFormat.ieeeQuadruple)
1100             alias M = ulong;
1101         else
1102             alias M = U;
1103         enum sig_mask = U(1) << (U.sizeof * 8 - 1);
1104         enum uint exp_shft = T.mant_dig - 1 - (T.sizeof > U.sizeof ? U.sizeof * 8 : 0);
1105         enum man_mask = (U(1) << exp_shft) - 1;
1106         enum idx = T.sizeof > U.sizeof ? MANTISSA_MSB : 0;
1107     }
1108     enum exp_mask = (U.max >> (exp_shft + 1)) << exp_shft;
1109     enum int exp_msh = exp_mask >> exp_shft;
1110     enum intPartMask = man_mask + 1;
1111     enum exp_nrm = S(exp_msh - T.max_exp - 1) << exp_shft;
1112 }
1113 
1114 // These apply to all floating-point types
1115 version (LittleEndian)
1116 {
1117     enum MANTISSA_LSB = 0;
1118     enum MANTISSA_MSB = 1;
1119 }
1120 else
1121 {
1122     enum MANTISSA_LSB = 1;
1123     enum MANTISSA_MSB = 0;
1124 }
The OpenD Programming Language