std.math.hardware source code

1 // Written in the D programming language.
2 
3 /**
4 This is a submodule of $(MREF std, math).
5 
6 It contains hardware support for floating point numbers.
7 
8 Copyright: Copyright The D Language Foundation 2000 - 2011.
9 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10 Authors:   $(HTTP digitalmars.com, Walter Bright), Don Clugston,
11            Conversion of CEPHES math library to D by Iain Buclaw and David Nadlinger
12 Source: $(PHOBOSSRC std/math/hardware.d)
13  */
14 
15 module std.math.hardware;
16 
17 static import core.stdc.fenv;
18 
19 version (X86)       version = X86_Any;
20 version (X86_64)    version = X86_Any;
21 version (PPC)       version = PPC_Any;
22 version (PPC64)     version = PPC_Any;
23 version (MIPS32)    version = MIPS_Any;
24 version (MIPS64)    version = MIPS_Any;
25 version (AArch64)   version = ARM_Any;
26 version (ARM)       version = ARM_Any;
27 version (S390)      version = IBMZ_Any;
28 version (SPARC)     version = SPARC_Any;
29 version (SPARC64)   version = SPARC_Any;
30 version (SystemZ)   version = IBMZ_Any;
31 version (RISCV32)   version = RISCV_Any;
32 version (RISCV64)   version = RISCV_Any;
33 version (LoongArch64)   version = LoongArch_Any;
34 
35 version (D_InlineAsm_X86)    version = InlineAsm_X86_Any;
36 version (D_InlineAsm_X86_64) version = InlineAsm_X86_Any;
37 
38 version (X86_64) version = StaticallyHaveSSE;
39 version (X86) version (OSX) version = StaticallyHaveSSE;
40 
41 version (StaticallyHaveSSE)
42 {
43     private enum bool haveSSE = true;
44 }
45 else version (X86)
46 {
47     static import core.cpuid;
48     private alias haveSSE = core.cpuid.sse;
49 }
50 
51 version (D_SoftFloat)
52 {
53     // Some soft float implementations may support IEEE floating flags.
54     // The implementation here supports hardware flags only and is so currently
55     // only available for supported targets.
56 }
57 else version (X86_Any)   version = IeeeFlagsSupport;
58 else version (PPC_Any)   version = IeeeFlagsSupport;
59 else version (RISCV_Any) version = IeeeFlagsSupport;
60 else version (MIPS_Any)  version = IeeeFlagsSupport;
61 else version (LoongArch_Any) version = IeeeFlagsSupport;
62 else version (ARM_Any)   version = IeeeFlagsSupport;
63 
64 // Struct FloatingPointControl is only available if hardware FP units are available.
65 version (D_HardFloat)
66 {
67     // FloatingPointControl.clearExceptions() depends on version IeeeFlagsSupport
68     version (IeeeFlagsSupport) version = FloatingPointControlSupport;
69 }
70 
71 version (IeeeFlagsSupport)
72 {
73 
74 /** IEEE exception status flags ('sticky bits')
75 
76  These flags indicate that an exceptional floating-point condition has occurred.
77  They indicate that a NaN or an infinity has been generated, that a result
78  is inexact, or that a signalling NaN has been encountered. If floating-point
79  exceptions are enabled (unmasked), a hardware exception will be generated
80  instead of setting these flags.
81  */
82 struct IeeeFlags
83 {
84 nothrow @nogc:
85 
86 private:
87     // The x87 FPU status register is 16 bits.
88     // The Pentium SSE2 status register is 32 bits.
89     // The ARM and PowerPC FPSCR is a 32-bit register.
90     // The SPARC FSR is a 32bit register (64 bits for SPARC 7 & 8, but high bits are uninteresting).
91     // The RISC-V (32 & 64 bit) fcsr is 32-bit register.
92     // THe LoongArch fcsr (fcsr0) is a 32-bit register.
93     uint flags;
94 
95     version (CRuntime_Microsoft)
96     {
97         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
98         // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits).
99         enum : int
100         {
101             INEXACT_MASK   = 0x20,
102             UNDERFLOW_MASK = 0x10,
103             OVERFLOW_MASK  = 0x08,
104             DIVBYZERO_MASK = 0x04,
105             INVALID_MASK   = 0x01,
106 
107             EXCEPTIONS_MASK = 0b11_1111
108         }
109         // Don't bother about subnormals, they are not supported on most CPUs.
110         //  SUBNORMAL_MASK = 0x02;
111     }
112     else
113     {
114         enum : int
115         {
116             INEXACT_MASK    = core.stdc.fenv.FE_INEXACT,
117             UNDERFLOW_MASK  = core.stdc.fenv.FE_UNDERFLOW,
118             OVERFLOW_MASK   = core.stdc.fenv.FE_OVERFLOW,
119             DIVBYZERO_MASK  = core.stdc.fenv.FE_DIVBYZERO,
120             INVALID_MASK    = core.stdc.fenv.FE_INVALID,
121             EXCEPTIONS_MASK = core.stdc.fenv.FE_ALL_EXCEPT,
122         }
123     }
124 
125     static uint getIeeeFlags() @trusted pure
126     {
127         version (InlineAsm_X86_Any)
128         {
129             ushort sw;
130             asm pure nothrow @nogc { fstsw sw; }
131 
132             // OR the result with the SSE2 status register (MXCSR).
133             if (haveSSE)
134             {
135                 uint mxcsr;
136                 asm pure nothrow @nogc { stmxcsr mxcsr; }
137                 return (sw | mxcsr) & EXCEPTIONS_MASK;
138             }
139             else return sw & EXCEPTIONS_MASK;
140         }
141         else version (SPARC)
142         {
143             /*
144                int retval;
145                asm pure nothrow @nogc { st %fsr, retval; }
146                return retval;
147             */
148             assert(0, "Not yet supported");
149         }
150         else version (ARM)
151         {
152             assert(false, "Not yet supported.");
153         }
154         else version (RISCV_Any)
155         {
156             uint result = void;
157             asm pure nothrow @nogc
158             {
159                 "frflags %0" : "=r" (result);
160             }
161             return result;
162         }
163         else version (LoongArch_Any)
164         {
165             uint result = void;
166             asm pure nothrow @nogc
167             {
168                 "movfcsr2gr %0, $fcsr2" : "=r" (result);
169             }
170             return result & EXCEPTIONS_MASK;
171         }
172         else
173             assert(0, "Not yet supported");
174     }
175 
176     static void resetIeeeFlags() @trusted
177     {
178         version (InlineAsm_X86_Any)
179         {
180             asm nothrow @nogc
181             {
182                 fnclex;
183             }
184 
185             // Also clear exception flags in MXCSR, SSE's control register.
186             if (haveSSE)
187             {
188                 uint mxcsr;
189                 asm nothrow @nogc { stmxcsr mxcsr; }
190                 mxcsr &= ~EXCEPTIONS_MASK;
191                 asm nothrow @nogc { ldmxcsr mxcsr; }
192             }
193         }
194         else version (RISCV_Any)
195         {
196             uint newValues = 0x0;
197             asm pure nothrow @nogc
198             {
199                 "fsflags %0" : : "r" (newValues);
200             }
201         }
202         else version (LoongArch_Any)
203         {
204             asm nothrow @nogc
205             {
206                 "movgr2fcsr $fcsr2,$r0";
207             }
208         }
209         else
210         {
211             /* SPARC:
212               int tmpval;
213               asm pure nothrow @nogc { st %fsr, tmpval; }
214               tmpval &=0xFFFF_FC00;
215               asm pure nothrow @nogc { ld tmpval, %fsr; }
216             */
217            assert(0, "Not yet supported");
218         }
219     }
220 
221 public:
222     /**
223      * The result cannot be represented exactly, so rounding occurred.
224      * Example: `x = sin(0.1);`
225      */
226     @property bool inexact() @safe const { return (flags & INEXACT_MASK) != 0; }
227 
228     /**
229      * A zero was generated by underflow
230      * Example: `x = real.min*real.epsilon/2;`
231      */
232     @property bool underflow() @safe const { return (flags & UNDERFLOW_MASK) != 0; }
233 
234     /**
235      * An infinity was generated by overflow
236      * Example: `x = real.max*2;`
237      */
238     @property bool overflow() @safe const { return (flags & OVERFLOW_MASK) != 0; }
239 
240     /**
241      * An infinity was generated by division by zero
242      * Example: `x = 3/0.0;`
243      */
244     @property bool divByZero() @safe const { return (flags & DIVBYZERO_MASK) != 0; }
245 
246     /**
247      * A machine NaN was generated.
248      * Example: `x = real.infinity * 0.0;`
249      */
250     @property bool invalid() @safe const { return (flags & INVALID_MASK) != 0; }
251 }
252 
253 ///
254 version (StdDdoc)
255 @safe unittest
256 {
257     import std.math.traits : isNaN;
258 
259     static void func() {
260         int a = 10 * 10;
261     }
262     real a = 3.5;
263     // Set all the flags to zero
264     resetIeeeFlags();
265     assert(!ieeeFlags.divByZero);
266     // Perform a division by zero.
267     a /= 0.0L;
268     assert(a == real.infinity);
269     assert(ieeeFlags.divByZero);
270     // Create a NaN
271     a *= 0.0L;
272     assert(ieeeFlags.invalid);
273     assert(isNaN(a));
274 
275     // Check that calling func() has no effect on the
276     // status flags.
277     IeeeFlags f = ieeeFlags;
278     func();
279     assert(ieeeFlags == f);
280 }
281 
282 @safe unittest
283 {
284     import std.math.traits : isNaN;
285 
286     static void func() {
287         int a = 10 * 10;
288     }
289     real a = 3.5;
290     // Set all the flags to zero
291     resetIeeeFlags();
292     assert(!ieeeFlags.divByZero);
293     // Perform a division by zero.
294     a = forceDivOp(a, 0.0L);
295     assert(a == real.infinity);
296     assert(ieeeFlags.divByZero);
297     // Create a NaN
298     a = forceMulOp(a, 0.0L);
299     assert(ieeeFlags.invalid);
300     assert(isNaN(a));
301 
302     // Check that calling func() has no effect on the
303     // status flags.
304     IeeeFlags f = ieeeFlags;
305     func();
306     assert(ieeeFlags == f);
307 }
308 
309 @safe unittest
310 {
311     import std.meta : AliasSeq;
312 
313     static struct Test
314     {
315         void delegate() @trusted action;
316         bool function() @trusted ieeeCheck;
317     }
318 
319     static foreach (T; AliasSeq!(float, double, real))
320     {{
321         T x; // Needs to be here to avoid `call without side effects` warning.
322         auto tests = [
323             Test(
324                 () { x = forceAddOp!T(1, 0.1L); },
325                 () => ieeeFlags.inexact
326             ),
327             Test(
328                 () { x = forceDivOp!T(T.min_normal, T.max); },
329                 () => ieeeFlags.underflow
330             ),
331             Test(
332                 () { x = forceAddOp!T(T.max, T.max); },
333                 () => ieeeFlags.overflow
334             ),
335             Test(
336                 () { x = forceDivOp!T(1, 0); },
337                 () => ieeeFlags.divByZero
338             ),
339             Test(
340                 () { x = forceDivOp!T(0, 0); },
341                 () => ieeeFlags.invalid
342             )
343         ];
344         foreach (test; tests)
345         {
346             resetIeeeFlags();
347             assert(!test.ieeeCheck());
348             test.action();
349             assert(test.ieeeCheck());
350         }
351     }}
352 }
353 
354 /// Set all of the floating-point status flags to false.
355 void resetIeeeFlags() @trusted nothrow @nogc
356 {
357     IeeeFlags.resetIeeeFlags();
358 }
359 
360 ///
361 version (StdDdoc)
362 @safe unittest
363 {
364     resetIeeeFlags();
365     real a = 3.5;
366     a /= 0.0L;
367     assert(a == real.infinity);
368     assert(ieeeFlags.divByZero);
369 
370     resetIeeeFlags();
371     assert(!ieeeFlags.divByZero);
372 }
373 
374 @safe unittest
375 {
376     resetIeeeFlags();
377     real a = 3.5;
378     a = forceDivOp(a, 0.0L);
379     assert(a == real.infinity);
380     assert(ieeeFlags.divByZero);
381 
382     resetIeeeFlags();
383     assert(!ieeeFlags.divByZero);
384 }
385 
386 /// Returns: snapshot of the current state of the floating-point status flags
387 @property IeeeFlags ieeeFlags() @trusted pure nothrow @nogc
388 {
389    return IeeeFlags(IeeeFlags.getIeeeFlags());
390 }
391 
392 ///
393 version (StdDdoc)
394 @safe nothrow unittest
395 {
396     import std.math.traits : isNaN;
397 
398     resetIeeeFlags();
399     real a = 3.5;
400 
401     a /= 0.0L;
402     assert(a == real.infinity);
403     assert(ieeeFlags.divByZero);
404 
405     a *= 0.0L;
406     assert(isNaN(a));
407     assert(ieeeFlags.invalid);
408 }
409 
410 @safe nothrow unittest
411 {
412     import std.math.traits : isNaN;
413 
414     resetIeeeFlags();
415     real a = 3.5;
416 
417     a = forceDivOp(a, 0.0L);
418     assert(a == real.infinity);
419     assert(ieeeFlags.divByZero);
420 
421     a = forceMulOp(a, 0.0L);
422     assert(isNaN(a));
423     assert(ieeeFlags.invalid);
424 }
425 
426 } // IeeeFlagsSupport
427 
428 
429 version (FloatingPointControlSupport)
430 {
431 
432 /** Control the Floating point hardware
433 
434   Change the IEEE754 floating-point rounding mode and the floating-point
435   hardware exceptions.
436 
437   By default, the rounding mode is roundToNearest and all hardware exceptions
438   are disabled. For most applications, debugging is easier if the $(I division
439   by zero), $(I overflow), and $(I invalid operation) exceptions are enabled.
440   These three are combined into a $(I severeExceptions) value for convenience.
441   Note in particular that if $(I invalidException) is enabled, a hardware trap
442   will be generated whenever an uninitialized floating-point variable is used.
443 
444   All changes are temporary. The previous state is restored at the
445   end of the scope.
446 
447 
448 Example:
449 ----
450 {
451     FloatingPointControl fpctrl;
452 
453     // Enable hardware exceptions for division by zero, overflow to infinity,
454     // invalid operations, and uninitialized floating-point variables.
455     fpctrl.enableExceptions(FloatingPointControl.severeExceptions);
456 
457     // This will generate a hardware exception, if x is a
458     // default-initialized floating point variable:
459     real x; // Add `= 0` or even `= real.nan` to not throw the exception.
460     real y = x * 3.0;
461 
462     // The exception is only thrown for default-uninitialized NaN-s.
463     // NaN-s with other payload are valid:
464     real z = y * real.nan; // ok
465 
466     // The set hardware exceptions and rounding modes will be disabled when
467     // leaving this scope.
468 }
469 ----
470 
471  */
472 struct FloatingPointControl
473 {
474 nothrow @nogc:
475 
476     alias RoundingMode = uint; ///
477 
478     version (StdDdoc)
479     {
480         enum : RoundingMode
481         {
482             /** IEEE rounding modes.
483              * The default mode is roundToNearest.
484              *
485              *  roundingMask = A mask of all rounding modes.
486              */
487             roundToNearest,
488             roundDown, /// ditto
489             roundUp, /// ditto
490             roundToZero, /// ditto
491             roundingMask, /// ditto
492         }
493     }
494     else version (CRuntime_Microsoft)
495     {
496         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
497         enum : RoundingMode
498         {
499             roundToNearest = 0x0000,
500             roundDown      = 0x0400,
501             roundUp        = 0x0800,
502             roundToZero    = 0x0C00,
503             roundingMask   = roundToNearest | roundDown
504                              | roundUp | roundToZero,
505         }
506     }
507     else
508     {
509         enum : RoundingMode
510         {
511             roundToNearest = core.stdc.fenv.FE_TONEAREST,
512             roundDown      = core.stdc.fenv.FE_DOWNWARD,
513             roundUp        = core.stdc.fenv.FE_UPWARD,
514             roundToZero    = core.stdc.fenv.FE_TOWARDZERO,
515             roundingMask   = roundToNearest | roundDown
516                              | roundUp | roundToZero,
517         }
518     }
519 
520     /***
521      * Change the floating-point hardware rounding mode
522      *
523      * Changing the rounding mode in the middle of a function can interfere
524      * with optimizations of floating point expressions, as the optimizer assumes
525      * that the rounding mode does not change.
526      * It is best to change the rounding mode only at the
527      * beginning of the function, and keep it until the function returns.
528      * It is also best to add the line:
529      * ---
530      * pragma(inline, false);
531      * ---
532      * as the first line of the function so it will not get inlined.
533      * Params:
534      *    newMode = the new rounding mode
535      */
536     @property void rounding(RoundingMode newMode) @trusted
537     {
538         initialize();
539         setControlState((getControlState() & (-1 - roundingMask)) | (newMode & roundingMask));
540     }
541 
542     /// Returns: the currently active rounding mode
543     @property static RoundingMode rounding() @trusted pure
544     {
545         return cast(RoundingMode)(getControlState() & roundingMask);
546     }
547 
548     alias ExceptionMask = uint; ///
549 
550     version (StdDdoc)
551     {
552         enum : ExceptionMask
553         {
554             /** IEEE hardware exceptions.
555              *  By default, all exceptions are masked (disabled).
556              *
557              *  severeExceptions = The overflow, division by zero, and invalid
558              *  exceptions.
559              */
560             subnormalException,
561             inexactException, /// ditto
562             underflowException, /// ditto
563             overflowException, /// ditto
564             divByZeroException, /// ditto
565             invalidException, /// ditto
566             severeExceptions, /// ditto
567             allExceptions, /// ditto
568         }
569     }
570     else version (ARM_Any)
571     {
572         enum : ExceptionMask
573         {
574             subnormalException    = 0x8000,
575             inexactException      = 0x1000,
576             underflowException    = 0x0800,
577             overflowException     = 0x0400,
578             divByZeroException    = 0x0200,
579             invalidException      = 0x0100,
580             severeExceptions   = overflowException | divByZeroException
581                                  | invalidException,
582             allExceptions      = severeExceptions | underflowException
583                                  | inexactException | subnormalException,
584         }
585     }
586     else version (PPC_Any)
587     {
588         enum : ExceptionMask
589         {
590             inexactException      = 0x0008,
591             divByZeroException    = 0x0010,
592             underflowException    = 0x0020,
593             overflowException     = 0x0040,
594             invalidException      = 0x0080,
595             severeExceptions   = overflowException | divByZeroException
596                                  | invalidException,
597             allExceptions      = severeExceptions | underflowException
598                                  | inexactException,
599         }
600     }
601     else version (RISCV_Any)
602     {
603         enum : ExceptionMask
604         {
605             inexactException      = 0x01,
606             divByZeroException    = 0x08,
607             underflowException    = 0x02,
608             overflowException     = 0x04,
609             invalidException      = 0x10,
610             severeExceptions   = overflowException | divByZeroException
611                                  | invalidException,
612             allExceptions      = severeExceptions | underflowException
613                                  | inexactException,
614         }
615     }
616     else version (HPPA)
617     {
618         enum : ExceptionMask
619         {
620             inexactException      = 0x01,
621             underflowException    = 0x02,
622             overflowException     = 0x04,
623             divByZeroException    = 0x08,
624             invalidException      = 0x10,
625             severeExceptions   = overflowException | divByZeroException
626                                  | invalidException,
627             allExceptions      = severeExceptions | underflowException
628                                  | inexactException,
629         }
630     }
631     else version (LoongArch_Any)
632     {
633         enum : ExceptionMask
634         {
635             inexactException      = 0x00,
636             divByZeroException    = 0x01,
637             overflowException     = 0x02,
638             underflowException    = 0x04,
639             invalidException      = 0x08,
640             severeExceptions   = overflowException | divByZeroException
641                                  | invalidException,
642             allExceptions      = severeExceptions | underflowException
643                                  | inexactException,
644         }
645     }
646     else version (MIPS_Any)
647     {
648         enum : ExceptionMask
649         {
650             inexactException      = 0x0080,
651             divByZeroException    = 0x0400,
652             overflowException     = 0x0200,
653             underflowException    = 0x0100,
654             invalidException      = 0x0800,
655             severeExceptions   = overflowException | divByZeroException
656                                  | invalidException,
657             allExceptions      = severeExceptions | underflowException
658                                  | inexactException,
659         }
660     }
661     else version (SPARC_Any)
662     {
663         enum : ExceptionMask
664         {
665             inexactException      = 0x0800000,
666             divByZeroException    = 0x1000000,
667             overflowException     = 0x4000000,
668             underflowException    = 0x2000000,
669             invalidException      = 0x8000000,
670             severeExceptions   = overflowException | divByZeroException
671                                  | invalidException,
672             allExceptions      = severeExceptions | underflowException
673                                  | inexactException,
674         }
675     }
676     else version (IBMZ_Any)
677     {
678         enum : ExceptionMask
679         {
680             inexactException      = 0x08000000,
681             divByZeroException    = 0x40000000,
682             overflowException     = 0x20000000,
683             underflowException    = 0x10000000,
684             invalidException      = 0x80000000,
685             severeExceptions   = overflowException | divByZeroException
686                                  | invalidException,
687             allExceptions      = severeExceptions | underflowException
688                                  | inexactException,
689         }
690     }
691     else version (X86_Any)
692     {
693         enum : ExceptionMask
694         {
695             inexactException      = 0x20,
696             underflowException    = 0x10,
697             overflowException     = 0x08,
698             divByZeroException    = 0x04,
699             subnormalException    = 0x02,
700             invalidException      = 0x01,
701             severeExceptions   = overflowException | divByZeroException
702                                  | invalidException,
703             allExceptions      = severeExceptions | underflowException
704                                  | inexactException | subnormalException,
705         }
706     }
707     else
708         static assert(false, "Not implemented for this architecture");
709 
710     version (ARM_Any)
711     {
712         static bool hasExceptionTraps_impl() @safe
713         {
714             auto oldState = getControlState();
715             // If exceptions are not supported, we set the bit but read it back as zero
716             // https://sourceware.org/ml/libc-ports/2012-06/msg00091.html
717             setControlState(oldState | divByZeroException);
718             immutable result = (getControlState() & allExceptions) != 0;
719             setControlState(oldState);
720             return result;
721         }
722     }
723 
724     /// Returns: true if the current FPU supports exception trapping
725     @property static bool hasExceptionTraps() @safe pure
726     {
727         version (X86_Any)
728             return true;
729         else version (PPC_Any)
730             return true;
731         else version (MIPS_Any)
732             return true;
733         else version (LoongArch_Any)
734             return true;
735         else version (ARM_Any)
736         {
737             // The hasExceptionTraps_impl function is basically pure,
738             // as it restores all global state
739             auto fptr = ( () @trusted => cast(bool function() @safe
740                 pure nothrow @nogc)&hasExceptionTraps_impl)();
741             return fptr();
742         }
743         else
744             assert(0, "Not yet supported");
745     }
746 
747     /// Enable (unmask) specific hardware exceptions. Multiple exceptions may be ORed together.
748     void enableExceptions(ExceptionMask exceptions) @trusted
749     {
750         assert(hasExceptionTraps);
751         initialize();
752         version (X86_Any)
753             setControlState(getControlState() & ~(exceptions & allExceptions));
754         else
755             setControlState(getControlState() | (exceptions & allExceptions));
756     }
757 
758     /// Disable (mask) specific hardware exceptions. Multiple exceptions may be ORed together.
759     void disableExceptions(ExceptionMask exceptions) @trusted
760     {
761         assert(hasExceptionTraps);
762         initialize();
763         version (X86_Any)
764             setControlState(getControlState() | (exceptions & allExceptions));
765         else
766             setControlState(getControlState() & ~(exceptions & allExceptions));
767     }
768 
769     /// Returns: the exceptions which are currently enabled (unmasked)
770     @property static ExceptionMask enabledExceptions() @trusted pure
771     {
772         assert(hasExceptionTraps);
773         version (X86_Any)
774             return (getControlState() & allExceptions) ^ allExceptions;
775         else
776             return (getControlState() & allExceptions);
777     }
778 
779     ///  Clear all pending exceptions, then restore the original exception state and rounding mode.
780     ~this() @trusted
781     {
782         clearExceptions();
783         if (initialized)
784             setControlState(savedState);
785     }
786 
787 private:
788     ControlState savedState;
789 
790     bool initialized = false;
791 
792     version (ARM_Any)
793     {
794         alias ControlState = uint;
795     }
796     else version (HPPA)
797     {
798         alias ControlState = uint;
799     }
800     else version (PPC_Any)
801     {
802         alias ControlState = uint;
803     }
804     else version (RISCV_Any)
805     {
806         alias ControlState = uint;
807     }
808     else version (LoongArch_Any)
809     {
810         alias ControlState = uint;
811     }
812     else version (MIPS_Any)
813     {
814         alias ControlState = uint;
815     }
816     else version (SPARC_Any)
817     {
818         alias ControlState = ulong;
819     }
820     else version (IBMZ_Any)
821     {
822         alias ControlState = uint;
823     }
824     else version (X86_Any)
825     {
826         alias ControlState = ushort;
827     }
828     else
829         static assert(false, "Not implemented for this architecture");
830 
831     void initialize() @safe
832     {
833         // BUG: This works around the absence of this() constructors.
834         if (initialized) return;
835         clearExceptions();
836         savedState = getControlState();
837         initialized = true;
838     }
839 
840     // Clear all pending exceptions
841     static void clearExceptions() @safe
842     {
843         version (IeeeFlagsSupport)
844             resetIeeeFlags();
845         else
846             static assert(false, "Not implemented for this architecture");
847     }
848 
849     // Read from the control register
850     package(std.math) static ControlState getControlState() @trusted pure
851     {
852         version (D_InlineAsm_X86)
853         {
854             short cont;
855             asm pure nothrow @nogc
856             {
857                 xor EAX, EAX;
858                 fstcw cont;
859             }
860             return cont;
861         }
862         else version (D_InlineAsm_X86_64)
863         {
864             short cont;
865             asm pure nothrow @nogc
866             {
867                 xor RAX, RAX;
868                 fstcw cont;
869             }
870             return cont;
871         }
872         else version (RISCV_Any)
873         {
874             ControlState cont;
875             asm pure nothrow @nogc
876             {
877                 "frcsr %0" : "=r" (cont);
878             }
879             return cont;
880         }
881         else version (LoongArch_Any)
882         {
883             ControlState cont;
884             asm pure nothrow @nogc
885             {
886                 "movfcsr2gr %0, $fcsr0" : "=r" (cont);
887             }
888             cont &= (roundingMask | allExceptions);
889             return cont;
890         }
891         else
892             assert(0, "Not yet supported");
893     }
894 
895     // Set the control register
896     package(std.math) static void setControlState(ControlState newState) @trusted
897     {
898         version (InlineAsm_X86_Any)
899         {
900             asm nothrow @nogc
901             {
902                 fclex;
903                 fldcw newState;
904             }
905 
906             // Also update MXCSR, SSE's control register.
907             if (haveSSE)
908             {
909                 uint mxcsr;
910                 asm nothrow @nogc { stmxcsr mxcsr; }
911 
912                 /* In the FPU control register, rounding mode is in bits 10 and
913                 11. In MXCSR it's in bits 13 and 14. */
914                 mxcsr &= ~(roundingMask << 3);             // delete old rounding mode
915                 mxcsr |= (newState & roundingMask) << 3;   // write new rounding mode
916 
917                 /* In the FPU control register, masks are bits 0 through 5.
918                 In MXCSR they're 7 through 12. */
919                 mxcsr &= ~(allExceptions << 7);            // delete old masks
920                 mxcsr |= (newState & allExceptions) << 7;  // write new exception masks
921 
922                 asm nothrow @nogc { ldmxcsr mxcsr; }
923             }
924         }
925         else version (RISCV_Any)
926         {
927             asm pure nothrow @nogc
928             {
929                 "fscsr %0" : : "r" (newState);
930             }
931         }
932         else version (LoongArch_Any)
933         {
934             asm nothrow @nogc
935             {
936                 "movgr2fcsr $fcsr0,%0" :
937                 : "r" (newState & (roundingMask | allExceptions));
938             }
939         }
940         else
941             assert(0, "Not yet supported");
942     }
943 }
944 
945 ///
946 @safe unittest
947 {
948     import std.math.rounding : lrint;
949 
950     FloatingPointControl fpctrl;
951 
952     fpctrl.rounding = FloatingPointControl.roundDown;
953     assert(lrint(1.5) == 1.0);
954 
955     fpctrl.rounding = FloatingPointControl.roundUp;
956     assert(lrint(1.4) == 2.0);
957 
958     fpctrl.rounding = FloatingPointControl.roundToNearest;
959     assert(lrint(1.5) == 2.0);
960 }
961 
962 @safe unittest
963 {
964     void ensureDefaults()
965     {
966         assert(FloatingPointControl.rounding
967                == FloatingPointControl.roundToNearest);
968         if (FloatingPointControl.hasExceptionTraps)
969             assert(FloatingPointControl.enabledExceptions == 0);
970     }
971 
972     {
973         FloatingPointControl ctrl;
974     }
975     ensureDefaults();
976 
977     {
978         FloatingPointControl ctrl;
979         ctrl.rounding = FloatingPointControl.roundDown;
980         assert(FloatingPointControl.rounding == FloatingPointControl.roundDown);
981     }
982     ensureDefaults();
983 
984     if (FloatingPointControl.hasExceptionTraps)
985     {
986         FloatingPointControl ctrl;
987         ctrl.enableExceptions(FloatingPointControl.divByZeroException
988                               | FloatingPointControl.overflowException);
989         assert(ctrl.enabledExceptions ==
990                (FloatingPointControl.divByZeroException
991                 | FloatingPointControl.overflowException));
992 
993         ctrl.rounding = FloatingPointControl.roundUp;
994         assert(FloatingPointControl.rounding == FloatingPointControl.roundUp);
995     }
996     ensureDefaults();
997 }
998 
999 @safe unittest // rounding
1000 {
1001     import std.meta : AliasSeq;
1002 
1003     static T addRound(T)(uint rm)
1004     {
1005         pragma(inline, false);
1006         FloatingPointControl fpctrl;
1007         fpctrl.rounding = rm;
1008         T x = 1;
1009         x = forceAddOp(x, 0.1L);
1010         return x;
1011     }
1012 
1013     static T subRound(T)(uint rm)
1014     {
1015         pragma(inline, false);
1016         FloatingPointControl fpctrl;
1017         fpctrl.rounding = rm;
1018         T x = -1;
1019         x = forceSubOp(x, 0.1L);
1020         return x;
1021     }
1022 
1023     static foreach (T; AliasSeq!(float, double, real))
1024     {{
1025         /* Be careful with changing the rounding mode, it interferes
1026          * with common subexpressions. Changing rounding modes should
1027          * be done with separate functions that are not inlined.
1028          */
1029 
1030         {
1031             T u = addRound!(T)(FloatingPointControl.roundUp);
1032             T d = addRound!(T)(FloatingPointControl.roundDown);
1033             T z = addRound!(T)(FloatingPointControl.roundToZero);
1034 
1035             assert(u > d);
1036             assert(z == d);
1037         }
1038 
1039         {
1040             T u = subRound!(T)(FloatingPointControl.roundUp);
1041             T d = subRound!(T)(FloatingPointControl.roundDown);
1042             T z = subRound!(T)(FloatingPointControl.roundToZero);
1043 
1044             assert(u > d);
1045             assert(z == u);
1046         }
1047     }}
1048 }
1049 
1050 } // FloatingPointControlSupport
1051 
1052 version (StdUnittest)
1053 {
1054     // These helpers are intended to avoid constant propagation by the optimizer.
1055     pragma(inline, false) private @safe
1056     {
1057         T forceAddOp(T)(T x, T y) { return x + y; }
1058         T forceSubOp(T)(T x, T y) { return x - y; }
1059         T forceMulOp(T)(T x, T y) { return x * y; }
1060         T forceDivOp(T)(T x, T y) { return x / y; }
1061     }
1062 }