std.math.hardware source code

1 // Written in the D programming language.
2 
3 /**
4 This is a submodule of $(MREF std, math).
5 
6 It contains hardware support for floating point numbers.
7 
8 Copyright: Copyright The D Language Foundation 2000 - 2011.
9 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 Authors:   $(HTTP digitalmars.com, Walter Bright), Don Clugston,
11            Conversion of CEPHES math library to D by Iain Buclaw and David Nadlinger
12 Source: $(PHOBOSSRC std/math/hardware.d)
13  */
14 
15 module std.math.hardware;
16 
17 static import core.stdc.fenv;
18 
19 version (LDC) import ldc.attributes;
20 else struct optStrategy { string s; }
21 
22 version (X86)       version = X86_Any;
23 version (X86_64)    version = X86_Any;
24 version (PPC)       version = PPC_Any;
25 version (PPC64)     version = PPC_Any;
26 version (MIPS32)    version = MIPS_Any;
27 version (MIPS64)    version = MIPS_Any;
28 version (AArch64)   version = ARM_Any;
29 version (ARM)       version = ARM_Any;
30 version (S390)      version = IBMZ_Any;
31 version (SPARC)     version = SPARC_Any;
32 version (SPARC64)   version = SPARC_Any;
33 version (SystemZ)   version = IBMZ_Any;
34 version (RISCV32)   version = RISCV_Any;
35 version (RISCV64)   version = RISCV_Any;
36 version (LoongArch64)   version = LoongArch_Any;
37 
38 version (D_InlineAsm_X86)    version = InlineAsm_X86_Any;
39 version (D_InlineAsm_X86_64) version = InlineAsm_X86_Any;
40 
41 version (X86_64) version = StaticallyHaveSSE;
42 version (X86) version (OSX) version = StaticallyHaveSSE;
43 
44 version (StaticallyHaveSSE)
45 {
46     private enum bool haveSSE = true;
47 }
48 else version (X86)
49 {
50     static import core.cpuid;
51     private alias haveSSE = core.cpuid.sse;
52 }
53 
54 version (D_SoftFloat)
55 {
56     // Some soft float implementations may support IEEE floating flags.
57     // The implementation here supports hardware flags only and is so currently
58     // only available for supported targets.
59 }
60 else version (X86_Any)   version = IeeeFlagsSupport;
61 else version (PPC_Any)   version = IeeeFlagsSupport;
62 else version (RISCV_Any) version = IeeeFlagsSupport;
63 else version (MIPS_Any)  version = IeeeFlagsSupport;
64 else version (LoongArch_Any) version = IeeeFlagsSupport;
65 else version (ARM_Any)   version = IeeeFlagsSupport;
66 
67 // Struct FloatingPointControl is only available if hardware FP units are available.
68 version (D_HardFloat)
69 {
70     // FloatingPointControl.clearExceptions() depends on version IeeeFlagsSupport
71     version (IeeeFlagsSupport) version = FloatingPointControlSupport;
72 }
73 
74 version (IeeeFlagsSupport)
75 {
76 
77 /** IEEE exception status flags ('sticky bits')
78 
79  These flags indicate that an exceptional floating-point condition has occurred.
80  They indicate that a NaN or an infinity has been generated, that a result
81  is inexact, or that a signalling NaN has been encountered. If floating-point
82  exceptions are enabled (unmasked), a hardware exception will be generated
83  instead of setting these flags.
84  */
85 struct IeeeFlags
86 {
87 nothrow @nogc:
88 
89 private:
90     // The x87 FPU status register is 16 bits.
91     // The Pentium SSE2 status register is 32 bits.
92     // The ARM and PowerPC FPSCR is a 32-bit register.
93     // The SPARC FSR is a 32bit register (64 bits for SPARC 7 & 8, but high bits are uninteresting).
94     // The RISC-V (32 & 64 bit) fcsr is 32-bit register.
95     // THe LoongArch fcsr (fcsr0) is a 32-bit register.
96     uint flags;
97 
98     version (CRuntime_Microsoft)
99     {
100         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
101         // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits).
102         enum : int
103         {
104             INEXACT_MASK   = 0x20,
105             UNDERFLOW_MASK = 0x10,
106             OVERFLOW_MASK  = 0x08,
107             DIVBYZERO_MASK = 0x04,
108             INVALID_MASK   = 0x01,
109 
110             EXCEPTIONS_MASK = 0b11_1111
111         }
112         // Don't bother about subnormals, they are not supported on most CPUs.
113         //  SUBNORMAL_MASK = 0x02;
114     }
115     else
116     {
117         enum : int
118         {
119             INEXACT_MASK    = core.stdc.fenv.FE_INEXACT,
120             UNDERFLOW_MASK  = core.stdc.fenv.FE_UNDERFLOW,
121             OVERFLOW_MASK   = core.stdc.fenv.FE_OVERFLOW,
122             DIVBYZERO_MASK  = core.stdc.fenv.FE_DIVBYZERO,
123             INVALID_MASK    = core.stdc.fenv.FE_INVALID,
124             EXCEPTIONS_MASK = core.stdc.fenv.FE_ALL_EXCEPT,
125         }
126     }
127 
128     static uint getIeeeFlags() @trusted pure
129     {
130         version (InlineAsm_X86_Any)
131         {
132             ushort sw;
133           version (LDC)
134           {
135             asm pure nothrow @nogc { "fstsw %0" : "=m" (sw); }
136           }
137           else
138           {
139             asm pure nothrow @nogc { fstsw sw; }
140           }
141 
142             // OR the result with the SSE2 status register (MXCSR).
143             if (haveSSE)
144             {
145                 uint mxcsr;
146               version (LDC)
147               {
148                 asm pure nothrow @nogc { "stmxcsr %0" : "=m" (mxcsr); }
149               }
150               else
151               {
152                 asm pure nothrow @nogc { stmxcsr mxcsr; }
153               }
154                 return (sw | mxcsr) & EXCEPTIONS_MASK;
155             }
156             else return sw & EXCEPTIONS_MASK;
157         }
158         else version (LDC)
159         {
160             version (PPC_Any)
161             {
162                 return FloatingPointControl.getControlState();
163             }
164             else version (MIPS_Any)
165             {
166                 return FloatingPointControl.getControlState();
167             }
168             else version (AArch64)
169             {
170                 uint fpsr;
171                 asm pure nothrow @nogc { "mrs %0, FPSR" : "=r" (fpsr); }
172                 return fpsr & 0x1F;
173             }
174             else version (ARM)
175             {
176                 const fpscr = FloatingPointControl.getControlState();
177                 return fpscr & 0x1F;
178             }
179             else version (RISCV_Any)
180             {
181                 uint result;
182                 asm pure nothrow @nogc { "frflags %0" : "=r" (result); }
183                 return result;
184             }
185             else
186                 assert(0, "Not yet supported");
187         }
188         else version (SPARC)
189         {
190             /*
191                int retval;
192                asm pure nothrow @nogc { st %fsr, retval; }
193                return retval;
194             */
195             assert(0, "Not yet supported");
196         }
197         else version (ARM)
198         {
199             assert(false, "Not yet supported.");
200         }
201         else version (RISCV_Any)
202         {
203             mixin(`
204             uint result = void;
205             asm pure nothrow @nogc
206             {
207                 "frflags %0" : "=r" (result);
208             }
209             return result;
210             `);
211         }
212         else version (LoongArch_Any)
213         {
214             uint result = void;
215             asm pure nothrow @nogc
216             {
217                 "movfcsr2gr %0,$r2" : "=r" (result);
218             }
219             return result & EXCEPTIONS_MASK;
220         }
221         else
222             assert(0, "Not yet supported");
223     }
224 
225     static void resetIeeeFlags() @trusted
226     {
227         version (InlineAsm_X86_Any)
228         {
229           version (LDC)
230           {
231             asm nothrow @nogc { "fnclex" : : : "fpsw"; }
232           }
233           else
234           {
235             asm nothrow @nogc
236             {
237                 fnclex;
238             }
239           }
240 
241             // Also clear exception flags in MXCSR, SSE's control register.
242             if (haveSSE)
243             {
244                 uint mxcsr;
245               version (LDC)
246               {
247                 asm nothrow @nogc { "stmxcsr %0" : "=m" (mxcsr); }
248                 mxcsr &= ~EXCEPTIONS_MASK;
249                 asm nothrow @nogc { "ldmxcsr %0" : : "m" (mxcsr) : "flags"; }
250               }
251               else
252               {
253                 asm nothrow @nogc { stmxcsr mxcsr; }
254                 mxcsr &= ~EXCEPTIONS_MASK;
255                 asm nothrow @nogc { ldmxcsr mxcsr; }
256               }
257             }
258         }
259         else version (RISCV_Any)
260         {
261             mixin(`
262             uint newValues = 0x0;
263             asm pure nothrow @nogc
264             {
265                 "fsflags %0" : : "r" (newValues);
266             }
267             `);
268         }
269         else version (LoongArch_Any)
270         {
271             asm nothrow @nogc
272             {
273                 "movgr2fcsr $r2,$r0";
274             }
275         }
276         else version (LDC)
277         {
278             version (PPC_Any)
279             {
280                 asm pure nothrow @nogc
281                 {
282                     `mtfsb0 3
283                      mtfsb0 4
284                      mtfsb0 5
285                      mtfsb0 6
286                      mtfsb0 7
287                      mtfsb0 8
288                      mtfsb0 9
289                      mtfsb0 10
290                      mtfsb0 11
291                      mtfsb0 12`;
292                 }
293             }
294             else version (MIPS_Any)
295             {
296                 version (D_LP64) enum mask = 0xFFFFFF80u;
297                 else             enum mask = 0xFF80u;
298 
299                 const newState = FloatingPointControl.getControlState() & mask;
300                 FloatingPointControl.setControlState(newState);
301             }
302             else version (AArch64)
303             {
304                 uint fpsr;
305                 asm pure nothrow @nogc { "mrs %0, FPSR" : "=r" (fpsr); }
306                 fpsr &= ~0x1F;
307                 asm pure nothrow @nogc { "msr FPSR, %0" : : "r" (fpsr); }
308             }
309             else version (ARM)
310             {
311                 const fpscr = FloatingPointControl.getControlState();
312                 FloatingPointControl.setControlState(fpscr & ~0x1F);
313             }
314             else
315                 assert(0, "Not yet supported");
316         }
317         else
318         {
319             /* SPARC:
320               int tmpval;
321               asm pure nothrow @nogc { st %fsr, tmpval; }
322               tmpval &=0xFFFF_FC00;
323               asm pure nothrow @nogc { ld tmpval, %fsr; }
324             */
325            assert(0, "Not yet supported");
326         }
327     }
328 
329 public:
330     /**
331      * The result cannot be represented exactly, so rounding occurred.
332      * Example: `x = sin(0.1);`
333      */
334     @property bool inexact() @safe const { return (flags & INEXACT_MASK) != 0; }
335 
336     /**
337      * A zero was generated by underflow
338      * Example: `x = real.min*real.epsilon/2;`
339      */
340     @property bool underflow() @safe const { return (flags & UNDERFLOW_MASK) != 0; }
341 
342     /**
343      * An infinity was generated by overflow
344      * Example: `x = real.max*2;`
345      */
346     @property bool overflow() @safe const { return (flags & OVERFLOW_MASK) != 0; }
347 
348     /**
349      * An infinity was generated by division by zero
350      * Example: `x = 3/0.0;`
351      */
352     @property bool divByZero() @safe const { return (flags & DIVBYZERO_MASK) != 0; }
353 
354     /**
355      * A machine NaN was generated.
356      * Example: `x = real.infinity * 0.0;`
357      */
358     @property bool invalid() @safe const { return (flags & INVALID_MASK) != 0; }
359 }
360 
361 ///
362 version (StdDdoc)
363 @safe unittest
364 {
365     import std.math.traits : isNaN;
366 
367     static void func() {
368         int a = 10 * 10;
369     }
370     real a = 3.5;
371     // Set all the flags to zero
372     resetIeeeFlags();
373     assert(!ieeeFlags.divByZero);
374     // Perform a division by zero.
375     a /= 0.0L;
376     assert(a == real.infinity);
377     assert(ieeeFlags.divByZero);
378     // Create a NaN
379     a *= 0.0L;
380     assert(ieeeFlags.invalid);
381     assert(isNaN(a));
382 
383     // Check that calling func() has no effect on the
384     // status flags.
385     IeeeFlags f = ieeeFlags;
386     func();
387     assert(ieeeFlags == f);
388 }
389 
390 @optStrategy("none") // LDC
391 @safe unittest
392 {
393     import std.math.traits : isNaN;
394 
395     static void func() {
396         int a = 10 * 10;
397     }
398     real a = 3.5;
399     // Set all the flags to zero
400     resetIeeeFlags();
401     assert(!ieeeFlags.divByZero);
402     // Perform a division by zero.
403     a = forceDivOp(a, 0.0L);
404     assert(a == real.infinity);
405     assert(ieeeFlags.divByZero);
406     // Create a NaN
407     a = forceMulOp(a, 0.0L);
408     assert(ieeeFlags.invalid);
409     assert(isNaN(a));
410 
411     // Check that calling func() has no effect on the
412     // status flags.
413     IeeeFlags f = ieeeFlags;
414     func();
415     assert(ieeeFlags == f);
416 }
417 
418 version (LDC)
419 {
420     unittest
421     {
422         pragma(msg, "ieeeFlags test disabled, see LDC Issue #888");
423     }
424 }
425 else
426 @safe unittest
427 {
428     import std.meta : AliasSeq;
429 
430     static struct Test
431     {
432         void delegate() @trusted action;
433         bool function() @trusted ieeeCheck;
434     }
435 
436     static foreach (T; AliasSeq!(float, double, real))
437     {{
438         T x; // Needs to be here to avoid `call without side effects` warning.
439         auto tests = [
440             Test(
441                 () { x = forceAddOp!T(1, 0.1L); },
442                 () => ieeeFlags.inexact
443             ),
444             Test(
445                 () { x = forceDivOp!T(T.min_normal, T.max); },
446                 () => ieeeFlags.underflow
447             ),
448             Test(
449                 () { x = forceAddOp!T(T.max, T.max); },
450                 () => ieeeFlags.overflow
451             ),
452             Test(
453                 () { x = forceDivOp!T(1, 0); },
454                 () => ieeeFlags.divByZero
455             ),
456             Test(
457                 () { x = forceDivOp!T(0, 0); },
458                 () => ieeeFlags.invalid
459             )
460         ];
461         foreach (test; tests)
462         {
463             resetIeeeFlags();
464             assert(!test.ieeeCheck());
465             test.action();
466             assert(test.ieeeCheck());
467         }
468     }}
469 }
470 
471 /// Set all of the floating-point status flags to false.
472 void resetIeeeFlags() @trusted nothrow @nogc
473 {
474     IeeeFlags.resetIeeeFlags();
475 }
476 
477 ///
478 version (StdDdoc)
479 @safe unittest
480 {
481     resetIeeeFlags();
482     real a = 3.5;
483     a /= 0.0L;
484     assert(a == real.infinity);
485     assert(ieeeFlags.divByZero);
486 
487     resetIeeeFlags();
488     assert(!ieeeFlags.divByZero);
489 }
490 
491 @optStrategy("none") // LDC, required for the IEEE flags check
492 @safe unittest
493 {
494     resetIeeeFlags();
495     real a = 3.5;
496     a = forceDivOp(a, 0.0L);
497     assert(a == real.infinity);
498     assert(ieeeFlags.divByZero);
499 
500     resetIeeeFlags();
501     assert(!ieeeFlags.divByZero);
502 }
503 
504 /// Returns: snapshot of the current state of the floating-point status flags
505 @property IeeeFlags ieeeFlags() @trusted pure nothrow @nogc
506 {
507    return IeeeFlags(IeeeFlags.getIeeeFlags());
508 }
509 
510 ///
511 version (StdDdoc)
512 @safe nothrow unittest
513 {
514     import std.math.traits : isNaN;
515 
516     resetIeeeFlags();
517     real a = 3.5;
518 
519     a /= 0.0L;
520     assert(a == real.infinity);
521     assert(ieeeFlags.divByZero);
522 
523     a *= 0.0L;
524     assert(isNaN(a));
525     assert(ieeeFlags.invalid);
526 }
527 
528 @optStrategy("none") // LDC, required for the IEEE flags check
529 @safe nothrow unittest
530 {
531     import std.math.traits : isNaN;
532 
533     resetIeeeFlags();
534     real a = 3.5;
535 
536     a = forceDivOp(a, 0.0L);
537     assert(a == real.infinity);
538     assert(ieeeFlags.divByZero);
539 
540     a = forceMulOp(a, 0.0L);
541     assert(isNaN(a));
542     assert(ieeeFlags.invalid);
543 }
544 
545 } // IeeeFlagsSupport
546 
547 
548 version (FloatingPointControlSupport)
549 {
550 
551 /** Control the Floating point hardware
552 
553   Change the IEEE754 floating-point rounding mode and the floating-point
554   hardware exceptions.
555 
556   By default, the rounding mode is roundToNearest and all hardware exceptions
557   are disabled. For most applications, debugging is easier if the $(I division
558   by zero), $(I overflow), and $(I invalid operation) exceptions are enabled.
559   These three are combined into a $(I severeExceptions) value for convenience.
560   Note in particular that if $(I invalidException) is enabled, a hardware trap
561   will be generated whenever an uninitialized floating-point variable is used.
562 
563   All changes are temporary. The previous state is restored at the
564   end of the scope.
565 
566 
567 Example:
568 ----
569 {
570     FloatingPointControl fpctrl;
571 
572     // Enable hardware exceptions for division by zero, overflow to infinity,
573     // invalid operations, and uninitialized floating-point variables.
574     fpctrl.enableExceptions(FloatingPointControl.severeExceptions);
575 
576     // This will generate a hardware exception, if x is a
577     // default-initialized floating point variable:
578     real x; // Add `= 0` or even `= real.nan` to not throw the exception.
579     real y = x * 3.0;
580 
581     // The exception is only thrown for default-uninitialized NaN-s.
582     // NaN-s with other payload are valid:
583     real z = y * real.nan; // ok
584 
585     // The set hardware exceptions and rounding modes will be disabled when
586     // leaving this scope.
587 }
588 ----
589 
590  */
591 struct FloatingPointControl
592 {
593 nothrow @nogc:
594 
595     alias RoundingMode = uint; ///
596 
597     version (StdDdoc)
598     {
599         enum : RoundingMode
600         {
601             /** IEEE rounding modes.
602              * The default mode is roundToNearest.
603              *
604              *  roundingMask = A mask of all rounding modes.
605              */
606             roundToNearest,
607             roundDown, /// ditto
608             roundUp, /// ditto
609             roundToZero, /// ditto
610             roundingMask, /// ditto
611         }
612     }
613     else version (CRuntime_Microsoft)
614     {
615         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
616         enum : RoundingMode
617         {
618             roundToNearest = 0x0000,
619             roundDown      = 0x0400,
620             roundUp        = 0x0800,
621             roundToZero    = 0x0C00,
622             roundingMask   = roundToNearest | roundDown
623                              | roundUp | roundToZero,
624         }
625     }
626     else
627     {
628         enum : RoundingMode
629         {
630             roundToNearest = core.stdc.fenv.FE_TONEAREST,
631             roundDown      = core.stdc.fenv.FE_DOWNWARD,
632             roundUp        = core.stdc.fenv.FE_UPWARD,
633             roundToZero    = core.stdc.fenv.FE_TOWARDZERO,
634             roundingMask   = roundToNearest | roundDown
635                              | roundUp | roundToZero,
636         }
637     }
638 
639     /***
640      * Change the floating-point hardware rounding mode
641      *
642      * Changing the rounding mode in the middle of a function can interfere
643      * with optimizations of floating point expressions, as the optimizer assumes
644      * that the rounding mode does not change.
645      * It is best to change the rounding mode only at the
646      * beginning of the function, and keep it until the function returns.
647      * It is also best to add the line:
648      * ---
649      * pragma(inline, false);
650      * ---
651      * as the first line of the function so it will not get inlined.
652      * Params:
653      *    newMode = the new rounding mode
654      */
655     @property void rounding(RoundingMode newMode) @trusted
656     {
657         initialize();
658         setControlState((getControlState() & (-1 - roundingMask)) | (newMode & roundingMask));
659     }
660 
661     /// Returns: the currently active rounding mode
662     @property static RoundingMode rounding() @trusted pure
663     {
664         return cast(RoundingMode)(getControlState() & roundingMask);
665     }
666 
667     alias ExceptionMask = uint; ///
668 
669     version (StdDdoc)
670     {
671         enum : ExceptionMask
672         {
673             /** IEEE hardware exceptions.
674              *  By default, all exceptions are masked (disabled).
675              *
676              *  severeExceptions = The overflow, division by zero, and invalid
677              *  exceptions.
678              */
679             subnormalException,
680             inexactException, /// ditto
681             underflowException, /// ditto
682             overflowException, /// ditto
683             divByZeroException, /// ditto
684             invalidException, /// ditto
685             severeExceptions, /// ditto
686             allExceptions, /// ditto
687         }
688     }
689     else version (ARM_Any)
690     {
691         enum : ExceptionMask
692         {
693             subnormalException    = 0x8000,
694             inexactException      = 0x1000,
695             underflowException    = 0x0800,
696             overflowException     = 0x0400,
697             divByZeroException    = 0x0200,
698             invalidException      = 0x0100,
699             severeExceptions   = overflowException | divByZeroException
700                                  | invalidException,
701             allExceptions      = severeExceptions | underflowException
702                                  | inexactException | subnormalException,
703         }
704     }
705     else version (PPC_Any)
706     {
707         enum : ExceptionMask
708         {
709             inexactException      = 0x0008,
710             divByZeroException    = 0x0010,
711             underflowException    = 0x0020,
712             overflowException     = 0x0040,
713             invalidException      = 0x0080,
714             severeExceptions   = overflowException | divByZeroException
715                                  | invalidException,
716             allExceptions      = severeExceptions | underflowException
717                                  | inexactException,
718         }
719     }
720     else version (RISCV_Any)
721     {
722         enum : ExceptionMask
723         {
724             inexactException      = 0x01,
725             divByZeroException    = 0x08,
726             underflowException    = 0x02,
727             overflowException     = 0x04,
728             invalidException      = 0x10,
729             severeExceptions   = overflowException | divByZeroException
730                                  | invalidException,
731             allExceptions      = severeExceptions | underflowException
732                                  | inexactException,
733         }
734     }
735     else version (HPPA)
736     {
737         enum : ExceptionMask
738         {
739             inexactException      = 0x01,
740             underflowException    = 0x02,
741             overflowException     = 0x04,
742             divByZeroException    = 0x08,
743             invalidException      = 0x10,
744             severeExceptions   = overflowException | divByZeroException
745                                  | invalidException,
746             allExceptions      = severeExceptions | underflowException
747                                  | inexactException,
748         }
749     }
750     else version (LoongArch_Any)
751     {
752         enum : ExceptionMask
753         {
754             inexactException      = 0x00,
755             divByZeroException    = 0x01,
756             overflowException     = 0x02,
757             underflowException    = 0x04,
758             invalidException      = 0x08,
759             severeExceptions   = overflowException | divByZeroException
760                                  | invalidException,
761             allExceptions      = severeExceptions | underflowException
762                                  | inexactException,
763         }
764     }
765     else version (MIPS_Any)
766     {
767         enum : ExceptionMask
768         {
769             inexactException      = 0x0080,
770             divByZeroException    = 0x0400,
771             overflowException     = 0x0200,
772             underflowException    = 0x0100,
773             invalidException      = 0x0800,
774             severeExceptions   = overflowException | divByZeroException
775                                  | invalidException,
776             allExceptions      = severeExceptions | underflowException
777                                  | inexactException,
778         }
779     }
780     else version (SPARC_Any)
781     {
782         enum : ExceptionMask
783         {
784             inexactException      = 0x0800000,
785             divByZeroException    = 0x1000000,
786             overflowException     = 0x4000000,
787             underflowException    = 0x2000000,
788             invalidException      = 0x8000000,
789             severeExceptions   = overflowException | divByZeroException
790                                  | invalidException,
791             allExceptions      = severeExceptions | underflowException
792                                  | inexactException,
793         }
794     }
795     else version (IBMZ_Any)
796     {
797         enum : ExceptionMask
798         {
799             inexactException      = 0x08000000,
800             divByZeroException    = 0x40000000,
801             overflowException     = 0x20000000,
802             underflowException    = 0x10000000,
803             invalidException      = 0x80000000,
804             severeExceptions   = overflowException | divByZeroException
805                                  | invalidException,
806             allExceptions      = severeExceptions | underflowException
807                                  | inexactException,
808         }
809     }
810     else version (X86_Any)
811     {
812         enum : ExceptionMask
813         {
814             inexactException      = 0x20,
815             underflowException    = 0x10,
816             overflowException     = 0x08,
817             divByZeroException    = 0x04,
818             subnormalException    = 0x02,
819             invalidException      = 0x01,
820             severeExceptions   = overflowException | divByZeroException
821                                  | invalidException,
822             allExceptions      = severeExceptions | underflowException
823                                  | inexactException | subnormalException,
824         }
825     }
826     else
827         static assert(false, "Not implemented for this architecture");
828 
829     version (ARM_Any)
830     {
831         static bool hasExceptionTraps_impl() @safe
832         {
833             auto oldState = getControlState();
834             // If exceptions are not supported, we set the bit but read it back as zero
835             // https://sourceware.org/ml/libc-ports/2012-06/msg00091.html
836             setControlState(oldState | divByZeroException);
837             immutable result = (getControlState() & allExceptions) != 0;
838             setControlState(oldState);
839             return result;
840         }
841     }
842 
843     /// Returns: true if the current FPU supports exception trapping
844     @property static bool hasExceptionTraps() @safe pure
845     {
846         version (X86_Any)
847             return true;
848         else version (PPC_Any)
849             return true;
850         else version (MIPS_Any)
851             return true;
852         else version (LoongArch_Any)
853             return true;
854         else version (ARM_Any)
855         {
856             // The hasExceptionTraps_impl function is basically pure,
857             // as it restores all global state
858             auto fptr = ( () @trusted => cast(bool function() @safe
859                 pure nothrow @nogc)&hasExceptionTraps_impl)();
860             return fptr();
861         }
862         else
863             assert(0, "Not yet supported");
864     }
865 
866     /// Enable (unmask) specific hardware exceptions. Multiple exceptions may be ORed together.
867     void enableExceptions(ExceptionMask exceptions) @trusted
868     {
869         assert(hasExceptionTraps);
870         initialize();
871         version (X86_Any)
872             setControlState(getControlState() & ~(exceptions & allExceptions));
873         else
874             setControlState(getControlState() | (exceptions & allExceptions));
875     }
876 
877     /// Disable (mask) specific hardware exceptions. Multiple exceptions may be ORed together.
878     void disableExceptions(ExceptionMask exceptions) @trusted
879     {
880         assert(hasExceptionTraps);
881         initialize();
882         version (X86_Any)
883             setControlState(getControlState() | (exceptions & allExceptions));
884         else
885             setControlState(getControlState() & ~(exceptions & allExceptions));
886     }
887 
888     /// Returns: the exceptions which are currently enabled (unmasked)
889     @property static ExceptionMask enabledExceptions() @trusted pure
890     {
891         assert(hasExceptionTraps);
892         version (X86_Any)
893             return (getControlState() & allExceptions) ^ allExceptions;
894         else
895             return (getControlState() & allExceptions);
896     }
897 
898     ///  Clear all pending exceptions, then restore the original exception state and rounding mode.
899     ~this() @trusted
900     {
901         clearExceptions();
902         if (initialized)
903             setControlState(savedState);
904     }
905 
906 private:
907     ControlState savedState;
908 
909     bool initialized = false;
910 
911     version (ARM_Any)
912     {
913         alias ControlState = uint;
914     }
915     else version (HPPA)
916     {
917         alias ControlState = uint;
918     }
919     else version (PPC_Any)
920     {
921         alias ControlState = uint;
922     }
923     else version (RISCV_Any)
924     {
925         alias ControlState = uint;
926     }
927     else version (LoongArch_Any)
928     {
929         alias ControlState = uint;
930     }
931     else version (MIPS_Any)
932     {
933         alias ControlState = uint;
934     }
935     else version (SPARC_Any)
936     {
937         alias ControlState = ulong;
938     }
939     else version (IBMZ_Any)
940     {
941         alias ControlState = uint;
942     }
943     else version (X86_Any)
944     {
945         alias ControlState = ushort;
946     }
947     else
948         static assert(false, "Not implemented for this architecture");
949 
950     void initialize() @safe
951     {
952         // BUG: This works around the absence of this() constructors.
953         if (initialized) return;
954         clearExceptions();
955         savedState = getControlState();
956         initialized = true;
957     }
958 
959     // Clear all pending exceptions
960     static void clearExceptions() @safe
961     {
962         version (LDC)
963         {
964             version (X86_Any)
965             {
966                 resetIeeeFlags();
967             }
968             else version (PPC_Any)
969             {
970                 asm pure nothrow @nogc @trusted
971                 {
972                     `mtfsb0 24
973                      mtfsb0 25
974                      mtfsb0 26
975                      mtfsb0 27
976                      mtfsb0 28`;
977                 }
978             }
979             else version (MIPS_Any)
980             {
981                 version (D_LP64) enum mask = 0xFFFFF07Fu;
982                 else             enum mask = 0xF07Fu;
983 
984                 const cs = getControlState();
985                 setControlState(cs & mask);
986             }
987             else version (ARM_Any)
988             {
989                 resetIeeeFlags();
990             }
991             else
992                 static assert(false, "Not implemented for this architecture");
993         }
994         else version (IeeeFlagsSupport)
995             resetIeeeFlags();
996         else
997             static assert(false, "Not implemented for this architecture");
998     }
999 
1000     // Read from the control register
1001     package(std.math) static ControlState getControlState() @trusted pure
1002     {
1003         version (LDC)
1004         {
1005             ControlState cont;
1006 
1007             version (X86)
1008             {
1009                 asm pure nothrow @nogc
1010                 {
1011                     `xor %%eax, %%eax
1012                      fstcw %0`
1013                     : "=m" (cont)
1014                     :
1015                     : "eax";
1016                 }
1017             }
1018             else version (X86_64)
1019             {
1020                 asm pure nothrow @nogc
1021                 {
1022                     `xor %%rax, %%rax
1023                      fstcw %0`
1024                     : "=m" (cont)
1025                     :
1026                     : "rax";
1027                 }
1028             }
1029             else version (PPC_Any)
1030             {
1031                 double fspr;
1032                 asm pure nothrow @nogc { "mffs %0" : "=f" (fspr); }
1033                 cont = cast(ControlState) *cast(ulong*) &fspr;
1034             }
1035             else version (MIPS_Any)
1036             {
1037                 asm pure nothrow @nogc
1038                 {
1039                     `.set noat
1040                      cfc1 %0, $31
1041                      .set at`
1042                     : "=r" (cont);
1043                 }
1044             }
1045             else version (AArch64)
1046             {
1047                 asm pure nothrow @nogc { "mrs %0, FPCR" : "=r" (cont); }
1048             }
1049             else version (ARM)
1050             {
1051                 asm pure nothrow @nogc { "vmrs %0, FPSCR" : "=r" (cont); }
1052             }
1053             else version (RISCV_Any)
1054             {
1055                 asm pure nothrow @nogc { "frcsr %0" : "=r" (cont); }
1056             }
1057             else
1058                 assert(0, "Not yet supported");
1059 
1060             return cont;
1061         }
1062         else version (D_InlineAsm_X86)
1063         {
1064             short cont;
1065             asm pure nothrow @nogc
1066             {
1067                 xor EAX, EAX;
1068                 fstcw cont;
1069             }
1070             return cont;
1071         }
1072         else version (D_InlineAsm_X86_64)
1073         {
1074             short cont;
1075             asm pure nothrow @nogc
1076             {
1077                 xor RAX, RAX;
1078                 fstcw cont;
1079             }
1080             return cont;
1081         }
1082         else version (RISCV_Any)
1083         {
1084             mixin(`
1085             ControlState cont;
1086             asm pure nothrow @nogc
1087             {
1088                 "frcsr %0" : "=r" (cont);
1089             }
1090             return cont;
1091             `);
1092         }
1093         else version (LoongArch_Any)
1094         {
1095             ControlState cont;
1096             asm pure nothrow @nogc
1097             {
1098                 "movfcsr2gr %0,$r0" : "=r" (cont);
1099             }
1100             cont &= (roundingMask | allExceptions);
1101             return cont;
1102         }
1103         else
1104             assert(0, "Not yet supported");
1105     }
1106 
1107     // Set the control register
1108     package(std.math) static void setControlState(ControlState newState) @trusted
1109     {
1110         version (InlineAsm_X86_Any)
1111         {
1112           version (LDC)
1113           {
1114             asm nothrow @nogc
1115             {
1116                 `fclex
1117                  fldcw %0`
1118                 :
1119                 : "m" (newState)
1120                 : "fpsw";
1121             }
1122           }
1123           else
1124           {
1125             asm nothrow @nogc
1126             {
1127                 fclex;
1128                 fldcw newState;
1129             }
1130           }
1131 
1132             // Also update MXCSR, SSE's control register.
1133             if (haveSSE)
1134             {
1135                 uint mxcsr;
1136               version (LDC)
1137               {
1138                 asm nothrow @nogc { "stmxcsr %0" : "=m" (mxcsr); }
1139               }
1140               else
1141               {
1142                 asm nothrow @nogc { stmxcsr mxcsr; }
1143               }
1144 
1145                 /* In the FPU control register, rounding mode is in bits 10 and
1146                 11. In MXCSR it's in bits 13 and 14. */
1147                 mxcsr &= ~(roundingMask << 3);             // delete old rounding mode
1148                 mxcsr |= (newState & roundingMask) << 3;   // write new rounding mode
1149 
1150                 /* In the FPU control register, masks are bits 0 through 5.
1151                 In MXCSR they're 7 through 12. */
1152                 mxcsr &= ~(allExceptions << 7);            // delete old masks
1153                 mxcsr |= (newState & allExceptions) << 7;  // write new exception masks
1154 
1155               version (LDC)
1156               {
1157                 asm nothrow @nogc { "ldmxcsr %0" : : "m" (mxcsr) : "flags"; }
1158               }
1159               else
1160               {
1161                 asm nothrow @nogc { ldmxcsr mxcsr; }
1162               }
1163             }
1164         }
1165         else version (RISCV_Any)
1166         {
1167             mixin(`
1168             asm pure nothrow @nogc
1169             {
1170                 "fscsr %0" : : "r" (newState);
1171             }
1172             `);
1173         }
1174         else version (LoongArch_Any)
1175         {
1176             asm nothrow @nogc
1177             {
1178                 "movgr2fcsr $r0,%0" :
1179                 : "r" (newState & (roundingMask | allExceptions));
1180             }
1181         }
1182         else version (LDC)
1183         {
1184             version (PPC_Any)
1185             {
1186                 ulong tmpState = newState;
1187                 double fspr = *cast(double*) &tmpState;
1188                 asm nothrow @nogc { "mtfsf 0x0f, %0" : : "f" (fspr); }
1189             }
1190             else version (MIPS_Any)
1191             {
1192                 asm nothrow @nogc
1193                 {
1194                     `.set noat
1195                      ctc1 %0, $31
1196                      .set at`
1197                     :
1198                     : "r" (newState);
1199                 }
1200             }
1201             else version (AArch64)
1202             {
1203                 asm nothrow @nogc { "msr FPCR, %0" : : "r" (newState); }
1204             }
1205             else version (ARM)
1206             {
1207                 asm nothrow @nogc { "vmsr FPSCR, %0" : : "r" (newState); }
1208             }
1209             else
1210                 assert(0, "Not yet supported");
1211         }
1212         else
1213             assert(0, "Not yet supported");
1214     }
1215 }
1216 
1217 ///
1218 @optStrategy("none") // LDC
1219 @safe unittest
1220 {
1221     import std.math.rounding : lrint;
1222 
1223     FloatingPointControl fpctrl;
1224 
1225     fpctrl.rounding = FloatingPointControl.roundDown;
1226     assert(lrint(1.5) == 1.0);
1227 
1228     fpctrl.rounding = FloatingPointControl.roundUp;
1229     assert(lrint(1.4) == 2.0);
1230 
1231     fpctrl.rounding = FloatingPointControl.roundToNearest;
1232     assert(lrint(1.5) == 2.0);
1233 }
1234 
1235 @safe unittest
1236 {
1237     void ensureDefaults()
1238     {
1239         assert(FloatingPointControl.rounding
1240                == FloatingPointControl.roundToNearest);
1241         if (FloatingPointControl.hasExceptionTraps)
1242             assert(FloatingPointControl.enabledExceptions == 0);
1243     }
1244 
1245     {
1246         FloatingPointControl ctrl;
1247     }
1248     ensureDefaults();
1249 
1250     {
1251         FloatingPointControl ctrl;
1252         ctrl.rounding = FloatingPointControl.roundDown;
1253         assert(FloatingPointControl.rounding == FloatingPointControl.roundDown);
1254     }
1255     ensureDefaults();
1256 
1257     if (FloatingPointControl.hasExceptionTraps)
1258     {
1259         FloatingPointControl ctrl;
1260         ctrl.enableExceptions(FloatingPointControl.divByZeroException
1261                               | FloatingPointControl.overflowException);
1262         assert(ctrl.enabledExceptions ==
1263                (FloatingPointControl.divByZeroException
1264                 | FloatingPointControl.overflowException));
1265 
1266         ctrl.rounding = FloatingPointControl.roundUp;
1267         assert(FloatingPointControl.rounding == FloatingPointControl.roundUp);
1268     }
1269     ensureDefaults();
1270 }
1271 
1272 version (LDC)
1273 {
1274     // TODO: most likely issue #888 again, verify
1275     // Linux x86_64: debug works, release fails
1276     // Win64: debug and release fail
1277 }
1278 else
1279 @safe unittest // rounding
1280 {
1281     import std.meta : AliasSeq;
1282 
1283     static T addRound(T)(uint rm)
1284     {
1285         pragma(inline, false);
1286         FloatingPointControl fpctrl;
1287         fpctrl.rounding = rm;
1288         T x = 1;
1289         x = forceAddOp(x, 0.1L);
1290         return x;
1291     }
1292 
1293     static T subRound(T)(uint rm)
1294     {
1295         pragma(inline, false);
1296         FloatingPointControl fpctrl;
1297         fpctrl.rounding = rm;
1298         T x = -1;
1299         x = forceSubOp(x, 0.1L);
1300         return x;
1301     }
1302 
1303     static foreach (T; AliasSeq!(float, double, real))
1304     {{
1305         /* Be careful with changing the rounding mode, it interferes
1306          * with common subexpressions. Changing rounding modes should
1307          * be done with separate functions that are not inlined.
1308          */
1309 
1310         {
1311             T u = addRound!(T)(FloatingPointControl.roundUp);
1312             T d = addRound!(T)(FloatingPointControl.roundDown);
1313             T z = addRound!(T)(FloatingPointControl.roundToZero);
1314 
1315             assert(u > d);
1316             assert(z == d);
1317         }
1318 
1319         {
1320             T u = subRound!(T)(FloatingPointControl.roundUp);
1321             T d = subRound!(T)(FloatingPointControl.roundDown);
1322             T z = subRound!(T)(FloatingPointControl.roundToZero);
1323 
1324             assert(u > d);
1325             assert(z == u);
1326         }
1327     }}
1328 }
1329 
1330 } // FloatingPointControlSupport
1331 
1332 version (StdUnittest)
1333 {
1334     // These helpers are intended to avoid constant propagation by the optimizer.
1335     pragma(inline, false) private @safe
1336     {
1337         T forceAddOp(T)(T x, T y) { return x + y; }
1338         T forceSubOp(T)(T x, T y) { return x - y; }
1339         T forceMulOp(T)(T x, T y) { return x * y; }
1340         T forceDivOp(T)(T x, T y) { return x / y; }
1341     }
1342 }
The OpenD Programming Language