1 // Written in the D programming language. 2 3 /** 4 This is a submodule of $(MREF std, math). 5 6 It contains hardware support for floating point numbers. 7 8 Copyright: Copyright The D Language Foundation 2000 - 2011. 9 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 10 Authors: $(HTTP digitalmars.com, Walter Bright), Don Clugston, 11 Conversion of CEPHES math library to D by Iain Buclaw and David Nadlinger 12 Source: $(PHOBOSSRC std/math/hardware.d) 13 */ 14 15 module std.math.hardware; 16 17 static import core.stdc.fenv; 18 19 version (X86) version = X86_Any; 20 version (X86_64) version = X86_Any; 21 version (PPC) version = PPC_Any; 22 version (PPC64) version = PPC_Any; 23 version (MIPS32) version = MIPS_Any; 24 version (MIPS64) version = MIPS_Any; 25 version (AArch64) version = ARM_Any; 26 version (ARM) version = ARM_Any; 27 version (S390) version = IBMZ_Any; 28 version (SPARC) version = SPARC_Any; 29 version (SPARC64) version = SPARC_Any; 30 version (SystemZ) version = IBMZ_Any; 31 version (RISCV32) version = RISCV_Any; 32 version (RISCV64) version = RISCV_Any; 33 version (LoongArch64) version = LoongArch_Any; 34 35 version (D_InlineAsm_X86) version = InlineAsm_X86_Any; 36 version (D_InlineAsm_X86_64) version = InlineAsm_X86_Any; 37 38 version (X86_64) version = StaticallyHaveSSE; 39 version (X86) version (OSX) version = StaticallyHaveSSE; 40 41 version (StaticallyHaveSSE) 42 { 43 private enum bool haveSSE = true; 44 } 45 else version (X86) 46 { 47 static import core.cpuid; 48 private alias haveSSE = core.cpuid.sse; 49 } 50 51 version (D_SoftFloat) 52 { 53 // Some soft float implementations may support IEEE floating flags. 54 // The implementation here supports hardware flags only and is so currently 55 // only available for supported targets. 56 } 57 else version (X86_Any) version = IeeeFlagsSupport; 58 else version (PPC_Any) version = IeeeFlagsSupport; 59 else version (RISCV_Any) version = IeeeFlagsSupport; 60 else version (MIPS_Any) version = IeeeFlagsSupport; 61 else version (LoongArch_Any) version = IeeeFlagsSupport; 62 else version (ARM_Any) version = IeeeFlagsSupport; 63 64 // Struct FloatingPointControl is only available if hardware FP units are available. 65 version (D_HardFloat) 66 { 67 // FloatingPointControl.clearExceptions() depends on version IeeeFlagsSupport 68 version (IeeeFlagsSupport) version = FloatingPointControlSupport; 69 } 70 71 version (IeeeFlagsSupport) 72 { 73 74 /** IEEE exception status flags ('sticky bits') 75 76 These flags indicate that an exceptional floating-point condition has occurred. 77 They indicate that a NaN or an infinity has been generated, that a result 78 is inexact, or that a signalling NaN has been encountered. If floating-point 79 exceptions are enabled (unmasked), a hardware exception will be generated 80 instead of setting these flags. 81 */ 82 struct IeeeFlags 83 { 84 nothrow @nogc: 85 86 private: 87 // The x87 FPU status register is 16 bits. 88 // The Pentium SSE2 status register is 32 bits. 89 // The ARM and PowerPC FPSCR is a 32-bit register. 90 // The SPARC FSR is a 32bit register (64 bits for SPARC 7 & 8, but high bits are uninteresting). 91 // The RISC-V (32 & 64 bit) fcsr is 32-bit register. 92 // THe LoongArch fcsr (fcsr0) is a 32-bit register. 93 uint flags; 94 95 version (CRuntime_Microsoft) 96 { 97 // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv). 98 // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits). 99 enum : int 100 { 101 INEXACT_MASK = 0x20, 102 UNDERFLOW_MASK = 0x10, 103 OVERFLOW_MASK = 0x08, 104 DIVBYZERO_MASK = 0x04, 105 INVALID_MASK = 0x01, 106 107 EXCEPTIONS_MASK = 0b11_1111 108 } 109 // Don't bother about subnormals, they are not supported on most CPUs. 110 // SUBNORMAL_MASK = 0x02; 111 } 112 else 113 { 114 enum : int 115 { 116 INEXACT_MASK = core.stdc.fenv.FE_INEXACT, 117 UNDERFLOW_MASK = core.stdc.fenv.FE_UNDERFLOW, 118 OVERFLOW_MASK = core.stdc.fenv.FE_OVERFLOW, 119 DIVBYZERO_MASK = core.stdc.fenv.FE_DIVBYZERO, 120 INVALID_MASK = core.stdc.fenv.FE_INVALID, 121 EXCEPTIONS_MASK = core.stdc.fenv.FE_ALL_EXCEPT, 122 } 123 } 124 125 static uint getIeeeFlags() @trusted pure 126 { 127 version (InlineAsm_X86_Any) 128 { 129 ushort sw; 130 asm pure nothrow @nogc { fstsw sw; } 131 132 // OR the result with the SSE2 status register (MXCSR). 133 if (haveSSE) 134 { 135 uint mxcsr; 136 asm pure nothrow @nogc { stmxcsr mxcsr; } 137 return (sw | mxcsr) & EXCEPTIONS_MASK; 138 } 139 else return sw & EXCEPTIONS_MASK; 140 } 141 else version (SPARC) 142 { 143 /* 144 int retval; 145 asm pure nothrow @nogc { st %fsr, retval; } 146 return retval; 147 */ 148 assert(0, "Not yet supported"); 149 } 150 else version (ARM) 151 { 152 assert(false, "Not yet supported."); 153 } 154 else version (RISCV_Any) 155 { 156 uint result = void; 157 asm pure nothrow @nogc 158 { 159 "frflags %0" : "=r" (result); 160 } 161 return result; 162 } 163 else version (LoongArch_Any) 164 { 165 uint result = void; 166 asm pure nothrow @nogc 167 { 168 "movfcsr2gr %0, $fcsr2" : "=r" (result); 169 } 170 return result & EXCEPTIONS_MASK; 171 } 172 else 173 assert(0, "Not yet supported"); 174 } 175 176 static void resetIeeeFlags() @trusted 177 { 178 version (InlineAsm_X86_Any) 179 { 180 asm nothrow @nogc 181 { 182 fnclex; 183 } 184 185 // Also clear exception flags in MXCSR, SSE's control register. 186 if (haveSSE) 187 { 188 uint mxcsr; 189 asm nothrow @nogc { stmxcsr mxcsr; } 190 mxcsr &= ~EXCEPTIONS_MASK; 191 asm nothrow @nogc { ldmxcsr mxcsr; } 192 } 193 } 194 else version (RISCV_Any) 195 { 196 uint newValues = 0x0; 197 asm pure nothrow @nogc 198 { 199 "fsflags %0" : : "r" (newValues); 200 } 201 } 202 else version (LoongArch_Any) 203 { 204 asm nothrow @nogc 205 { 206 "movgr2fcsr $fcsr2,$r0"; 207 } 208 } 209 else 210 { 211 /* SPARC: 212 int tmpval; 213 asm pure nothrow @nogc { st %fsr, tmpval; } 214 tmpval &=0xFFFF_FC00; 215 asm pure nothrow @nogc { ld tmpval, %fsr; } 216 */ 217 assert(0, "Not yet supported"); 218 } 219 } 220 221 public: 222 /** 223 * The result cannot be represented exactly, so rounding occurred. 224 * Example: `x = sin(0.1);` 225 */ 226 @property bool inexact() @safe const { return (flags & INEXACT_MASK) != 0; } 227 228 /** 229 * A zero was generated by underflow 230 * Example: `x = real.min*real.epsilon/2;` 231 */ 232 @property bool underflow() @safe const { return (flags & UNDERFLOW_MASK) != 0; } 233 234 /** 235 * An infinity was generated by overflow 236 * Example: `x = real.max*2;` 237 */ 238 @property bool overflow() @safe const { return (flags & OVERFLOW_MASK) != 0; } 239 240 /** 241 * An infinity was generated by division by zero 242 * Example: `x = 3/0.0;` 243 */ 244 @property bool divByZero() @safe const { return (flags & DIVBYZERO_MASK) != 0; } 245 246 /** 247 * A machine NaN was generated. 248 * Example: `x = real.infinity * 0.0;` 249 */ 250 @property bool invalid() @safe const { return (flags & INVALID_MASK) != 0; } 251 } 252 253 /// 254 version (StdDdoc) 255 @safe unittest 256 { 257 import std.math.traits : isNaN; 258 259 static void func() { 260 int a = 10 * 10; 261 } 262 real a = 3.5; 263 // Set all the flags to zero 264 resetIeeeFlags(); 265 assert(!ieeeFlags.divByZero); 266 // Perform a division by zero. 267 a /= 0.0L; 268 assert(a == real.infinity); 269 assert(ieeeFlags.divByZero); 270 // Create a NaN 271 a *= 0.0L; 272 assert(ieeeFlags.invalid); 273 assert(isNaN(a)); 274 275 // Check that calling func() has no effect on the 276 // status flags. 277 IeeeFlags f = ieeeFlags; 278 func(); 279 assert(ieeeFlags == f); 280 } 281 282 @safe unittest 283 { 284 import std.math.traits : isNaN; 285 286 static void func() { 287 int a = 10 * 10; 288 } 289 real a = 3.5; 290 // Set all the flags to zero 291 resetIeeeFlags(); 292 assert(!ieeeFlags.divByZero); 293 // Perform a division by zero. 294 a = forceDivOp(a, 0.0L); 295 assert(a == real.infinity); 296 assert(ieeeFlags.divByZero); 297 // Create a NaN 298 a = forceMulOp(a, 0.0L); 299 assert(ieeeFlags.invalid); 300 assert(isNaN(a)); 301 302 // Check that calling func() has no effect on the 303 // status flags. 304 IeeeFlags f = ieeeFlags; 305 func(); 306 assert(ieeeFlags == f); 307 } 308 309 @safe unittest 310 { 311 import std.meta : AliasSeq; 312 313 static struct Test 314 { 315 void delegate() @trusted action; 316 bool function() @trusted ieeeCheck; 317 } 318 319 static foreach (T; AliasSeq!(float, double, real)) 320 {{ 321 T x; // Needs to be here to avoid `call without side effects` warning. 322 auto tests = [ 323 Test( 324 () { x = forceAddOp!T(1, 0.1L); }, 325 () => ieeeFlags.inexact 326 ), 327 Test( 328 () { x = forceDivOp!T(T.min_normal, T.max); }, 329 () => ieeeFlags.underflow 330 ), 331 Test( 332 () { x = forceAddOp!T(T.max, T.max); }, 333 () => ieeeFlags.overflow 334 ), 335 Test( 336 () { x = forceDivOp!T(1, 0); }, 337 () => ieeeFlags.divByZero 338 ), 339 Test( 340 () { x = forceDivOp!T(0, 0); }, 341 () => ieeeFlags.invalid 342 ) 343 ]; 344 foreach (test; tests) 345 { 346 resetIeeeFlags(); 347 assert(!test.ieeeCheck()); 348 test.action(); 349 assert(test.ieeeCheck()); 350 } 351 }} 352 } 353 354 /// Set all of the floating-point status flags to false. 355 void resetIeeeFlags() @trusted nothrow @nogc 356 { 357 IeeeFlags.resetIeeeFlags(); 358 } 359 360 /// 361 version (StdDdoc) 362 @safe unittest 363 { 364 resetIeeeFlags(); 365 real a = 3.5; 366 a /= 0.0L; 367 assert(a == real.infinity); 368 assert(ieeeFlags.divByZero); 369 370 resetIeeeFlags(); 371 assert(!ieeeFlags.divByZero); 372 } 373 374 @safe unittest 375 { 376 resetIeeeFlags(); 377 real a = 3.5; 378 a = forceDivOp(a, 0.0L); 379 assert(a == real.infinity); 380 assert(ieeeFlags.divByZero); 381 382 resetIeeeFlags(); 383 assert(!ieeeFlags.divByZero); 384 } 385 386 /// Returns: snapshot of the current state of the floating-point status flags 387 @property IeeeFlags ieeeFlags() @trusted pure nothrow @nogc 388 { 389 return IeeeFlags(IeeeFlags.getIeeeFlags()); 390 } 391 392 /// 393 version (StdDdoc) 394 @safe nothrow unittest 395 { 396 import std.math.traits : isNaN; 397 398 resetIeeeFlags(); 399 real a = 3.5; 400 401 a /= 0.0L; 402 assert(a == real.infinity); 403 assert(ieeeFlags.divByZero); 404 405 a *= 0.0L; 406 assert(isNaN(a)); 407 assert(ieeeFlags.invalid); 408 } 409 410 @safe nothrow unittest 411 { 412 import std.math.traits : isNaN; 413 414 resetIeeeFlags(); 415 real a = 3.5; 416 417 a = forceDivOp(a, 0.0L); 418 assert(a == real.infinity); 419 assert(ieeeFlags.divByZero); 420 421 a = forceMulOp(a, 0.0L); 422 assert(isNaN(a)); 423 assert(ieeeFlags.invalid); 424 } 425 426 } // IeeeFlagsSupport 427 428 429 version (FloatingPointControlSupport) 430 { 431 432 /** Control the Floating point hardware 433 434 Change the IEEE754 floating-point rounding mode and the floating-point 435 hardware exceptions. 436 437 By default, the rounding mode is roundToNearest and all hardware exceptions 438 are disabled. For most applications, debugging is easier if the $(I division 439 by zero), $(I overflow), and $(I invalid operation) exceptions are enabled. 440 These three are combined into a $(I severeExceptions) value for convenience. 441 Note in particular that if $(I invalidException) is enabled, a hardware trap 442 will be generated whenever an uninitialized floating-point variable is used. 443 444 All changes are temporary. The previous state is restored at the 445 end of the scope. 446 447 448 Example: 449 ---- 450 { 451 FloatingPointControl fpctrl; 452 453 // Enable hardware exceptions for division by zero, overflow to infinity, 454 // invalid operations, and uninitialized floating-point variables. 455 fpctrl.enableExceptions(FloatingPointControl.severeExceptions); 456 457 // This will generate a hardware exception, if x is a 458 // default-initialized floating point variable: 459 real x; // Add `= 0` or even `= real.nan` to not throw the exception. 460 real y = x * 3.0; 461 462 // The exception is only thrown for default-uninitialized NaN-s. 463 // NaN-s with other payload are valid: 464 real z = y * real.nan; // ok 465 466 // The set hardware exceptions and rounding modes will be disabled when 467 // leaving this scope. 468 } 469 ---- 470 471 */ 472 struct FloatingPointControl 473 { 474 nothrow @nogc: 475 476 alias RoundingMode = uint; /// 477 478 version (StdDdoc) 479 { 480 enum : RoundingMode 481 { 482 /** IEEE rounding modes. 483 * The default mode is roundToNearest. 484 * 485 * roundingMask = A mask of all rounding modes. 486 */ 487 roundToNearest, 488 roundDown, /// ditto 489 roundUp, /// ditto 490 roundToZero, /// ditto 491 roundingMask, /// ditto 492 } 493 } 494 else version (CRuntime_Microsoft) 495 { 496 // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv). 497 enum : RoundingMode 498 { 499 roundToNearest = 0x0000, 500 roundDown = 0x0400, 501 roundUp = 0x0800, 502 roundToZero = 0x0C00, 503 roundingMask = roundToNearest | roundDown 504 | roundUp | roundToZero, 505 } 506 } 507 else 508 { 509 enum : RoundingMode 510 { 511 roundToNearest = core.stdc.fenv.FE_TONEAREST, 512 roundDown = core.stdc.fenv.FE_DOWNWARD, 513 roundUp = core.stdc.fenv.FE_UPWARD, 514 roundToZero = core.stdc.fenv.FE_TOWARDZERO, 515 roundingMask = roundToNearest | roundDown 516 | roundUp | roundToZero, 517 } 518 } 519 520 /*** 521 * Change the floating-point hardware rounding mode 522 * 523 * Changing the rounding mode in the middle of a function can interfere 524 * with optimizations of floating point expressions, as the optimizer assumes 525 * that the rounding mode does not change. 526 * It is best to change the rounding mode only at the 527 * beginning of the function, and keep it until the function returns. 528 * It is also best to add the line: 529 * --- 530 * pragma(inline, false); 531 * --- 532 * as the first line of the function so it will not get inlined. 533 * Params: 534 * newMode = the new rounding mode 535 */ 536 @property void rounding(RoundingMode newMode) @trusted 537 { 538 initialize(); 539 setControlState((getControlState() & (-1 - roundingMask)) | (newMode & roundingMask)); 540 } 541 542 /// Returns: the currently active rounding mode 543 @property static RoundingMode rounding() @trusted pure 544 { 545 return cast(RoundingMode)(getControlState() & roundingMask); 546 } 547 548 alias ExceptionMask = uint; /// 549 550 version (StdDdoc) 551 { 552 enum : ExceptionMask 553 { 554 /** IEEE hardware exceptions. 555 * By default, all exceptions are masked (disabled). 556 * 557 * severeExceptions = The overflow, division by zero, and invalid 558 * exceptions. 559 */ 560 subnormalException, 561 inexactException, /// ditto 562 underflowException, /// ditto 563 overflowException, /// ditto 564 divByZeroException, /// ditto 565 invalidException, /// ditto 566 severeExceptions, /// ditto 567 allExceptions, /// ditto 568 } 569 } 570 else version (ARM_Any) 571 { 572 enum : ExceptionMask 573 { 574 subnormalException = 0x8000, 575 inexactException = 0x1000, 576 underflowException = 0x0800, 577 overflowException = 0x0400, 578 divByZeroException = 0x0200, 579 invalidException = 0x0100, 580 severeExceptions = overflowException | divByZeroException 581 | invalidException, 582 allExceptions = severeExceptions | underflowException 583 | inexactException | subnormalException, 584 } 585 } 586 else version (PPC_Any) 587 { 588 enum : ExceptionMask 589 { 590 inexactException = 0x0008, 591 divByZeroException = 0x0010, 592 underflowException = 0x0020, 593 overflowException = 0x0040, 594 invalidException = 0x0080, 595 severeExceptions = overflowException | divByZeroException 596 | invalidException, 597 allExceptions = severeExceptions | underflowException 598 | inexactException, 599 } 600 } 601 else version (RISCV_Any) 602 { 603 enum : ExceptionMask 604 { 605 inexactException = 0x01, 606 divByZeroException = 0x08, 607 underflowException = 0x02, 608 overflowException = 0x04, 609 invalidException = 0x10, 610 severeExceptions = overflowException | divByZeroException 611 | invalidException, 612 allExceptions = severeExceptions | underflowException 613 | inexactException, 614 } 615 } 616 else version (HPPA) 617 { 618 enum : ExceptionMask 619 { 620 inexactException = 0x01, 621 underflowException = 0x02, 622 overflowException = 0x04, 623 divByZeroException = 0x08, 624 invalidException = 0x10, 625 severeExceptions = overflowException | divByZeroException 626 | invalidException, 627 allExceptions = severeExceptions | underflowException 628 | inexactException, 629 } 630 } 631 else version (LoongArch_Any) 632 { 633 enum : ExceptionMask 634 { 635 inexactException = 0x00, 636 divByZeroException = 0x01, 637 overflowException = 0x02, 638 underflowException = 0x04, 639 invalidException = 0x08, 640 severeExceptions = overflowException | divByZeroException 641 | invalidException, 642 allExceptions = severeExceptions | underflowException 643 | inexactException, 644 } 645 } 646 else version (MIPS_Any) 647 { 648 enum : ExceptionMask 649 { 650 inexactException = 0x0080, 651 divByZeroException = 0x0400, 652 overflowException = 0x0200, 653 underflowException = 0x0100, 654 invalidException = 0x0800, 655 severeExceptions = overflowException | divByZeroException 656 | invalidException, 657 allExceptions = severeExceptions | underflowException 658 | inexactException, 659 } 660 } 661 else version (SPARC_Any) 662 { 663 enum : ExceptionMask 664 { 665 inexactException = 0x0800000, 666 divByZeroException = 0x1000000, 667 overflowException = 0x4000000, 668 underflowException = 0x2000000, 669 invalidException = 0x8000000, 670 severeExceptions = overflowException | divByZeroException 671 | invalidException, 672 allExceptions = severeExceptions | underflowException 673 | inexactException, 674 } 675 } 676 else version (IBMZ_Any) 677 { 678 enum : ExceptionMask 679 { 680 inexactException = 0x08000000, 681 divByZeroException = 0x40000000, 682 overflowException = 0x20000000, 683 underflowException = 0x10000000, 684 invalidException = 0x80000000, 685 severeExceptions = overflowException | divByZeroException 686 | invalidException, 687 allExceptions = severeExceptions | underflowException 688 | inexactException, 689 } 690 } 691 else version (X86_Any) 692 { 693 enum : ExceptionMask 694 { 695 inexactException = 0x20, 696 underflowException = 0x10, 697 overflowException = 0x08, 698 divByZeroException = 0x04, 699 subnormalException = 0x02, 700 invalidException = 0x01, 701 severeExceptions = overflowException | divByZeroException 702 | invalidException, 703 allExceptions = severeExceptions | underflowException 704 | inexactException | subnormalException, 705 } 706 } 707 else 708 static assert(false, "Not implemented for this architecture"); 709 710 version (ARM_Any) 711 { 712 static bool hasExceptionTraps_impl() @safe 713 { 714 auto oldState = getControlState(); 715 // If exceptions are not supported, we set the bit but read it back as zero 716 // https://sourceware.org/ml/libc-ports/2012-06/msg00091.html 717 setControlState(oldState | divByZeroException); 718 immutable result = (getControlState() & allExceptions) != 0; 719 setControlState(oldState); 720 return result; 721 } 722 } 723 724 /// Returns: true if the current FPU supports exception trapping 725 @property static bool hasExceptionTraps() @safe pure 726 { 727 version (X86_Any) 728 return true; 729 else version (PPC_Any) 730 return true; 731 else version (MIPS_Any) 732 return true; 733 else version (LoongArch_Any) 734 return true; 735 else version (ARM_Any) 736 { 737 // The hasExceptionTraps_impl function is basically pure, 738 // as it restores all global state 739 auto fptr = ( () @trusted => cast(bool function() @safe 740 pure nothrow @nogc)&hasExceptionTraps_impl)(); 741 return fptr(); 742 } 743 else 744 assert(0, "Not yet supported"); 745 } 746 747 /// Enable (unmask) specific hardware exceptions. Multiple exceptions may be ORed together. 748 void enableExceptions(ExceptionMask exceptions) @trusted 749 { 750 assert(hasExceptionTraps); 751 initialize(); 752 version (X86_Any) 753 setControlState(getControlState() & ~(exceptions & allExceptions)); 754 else 755 setControlState(getControlState() | (exceptions & allExceptions)); 756 } 757 758 /// Disable (mask) specific hardware exceptions. Multiple exceptions may be ORed together. 759 void disableExceptions(ExceptionMask exceptions) @trusted 760 { 761 assert(hasExceptionTraps); 762 initialize(); 763 version (X86_Any) 764 setControlState(getControlState() | (exceptions & allExceptions)); 765 else 766 setControlState(getControlState() & ~(exceptions & allExceptions)); 767 } 768 769 /// Returns: the exceptions which are currently enabled (unmasked) 770 @property static ExceptionMask enabledExceptions() @trusted pure 771 { 772 assert(hasExceptionTraps); 773 version (X86_Any) 774 return (getControlState() & allExceptions) ^ allExceptions; 775 else 776 return (getControlState() & allExceptions); 777 } 778 779 /// Clear all pending exceptions, then restore the original exception state and rounding mode. 780 ~this() @trusted 781 { 782 clearExceptions(); 783 if (initialized) 784 setControlState(savedState); 785 } 786 787 private: 788 ControlState savedState; 789 790 bool initialized = false; 791 792 version (ARM_Any) 793 { 794 alias ControlState = uint; 795 } 796 else version (HPPA) 797 { 798 alias ControlState = uint; 799 } 800 else version (PPC_Any) 801 { 802 alias ControlState = uint; 803 } 804 else version (RISCV_Any) 805 { 806 alias ControlState = uint; 807 } 808 else version (LoongArch_Any) 809 { 810 alias ControlState = uint; 811 } 812 else version (MIPS_Any) 813 { 814 alias ControlState = uint; 815 } 816 else version (SPARC_Any) 817 { 818 alias ControlState = ulong; 819 } 820 else version (IBMZ_Any) 821 { 822 alias ControlState = uint; 823 } 824 else version (X86_Any) 825 { 826 alias ControlState = ushort; 827 } 828 else 829 static assert(false, "Not implemented for this architecture"); 830 831 void initialize() @safe 832 { 833 // BUG: This works around the absence of this() constructors. 834 if (initialized) return; 835 clearExceptions(); 836 savedState = getControlState(); 837 initialized = true; 838 } 839 840 // Clear all pending exceptions 841 static void clearExceptions() @safe 842 { 843 version (IeeeFlagsSupport) 844 resetIeeeFlags(); 845 else 846 static assert(false, "Not implemented for this architecture"); 847 } 848 849 // Read from the control register 850 package(std.math) static ControlState getControlState() @trusted pure 851 { 852 version (D_InlineAsm_X86) 853 { 854 short cont; 855 asm pure nothrow @nogc 856 { 857 xor EAX, EAX; 858 fstcw cont; 859 } 860 return cont; 861 } 862 else version (D_InlineAsm_X86_64) 863 { 864 short cont; 865 asm pure nothrow @nogc 866 { 867 xor RAX, RAX; 868 fstcw cont; 869 } 870 return cont; 871 } 872 else version (RISCV_Any) 873 { 874 ControlState cont; 875 asm pure nothrow @nogc 876 { 877 "frcsr %0" : "=r" (cont); 878 } 879 return cont; 880 } 881 else version (LoongArch_Any) 882 { 883 ControlState cont; 884 asm pure nothrow @nogc 885 { 886 "movfcsr2gr %0, $fcsr0" : "=r" (cont); 887 } 888 cont &= (roundingMask | allExceptions); 889 return cont; 890 } 891 else 892 assert(0, "Not yet supported"); 893 } 894 895 // Set the control register 896 package(std.math) static void setControlState(ControlState newState) @trusted 897 { 898 version (InlineAsm_X86_Any) 899 { 900 asm nothrow @nogc 901 { 902 fclex; 903 fldcw newState; 904 } 905 906 // Also update MXCSR, SSE's control register. 907 if (haveSSE) 908 { 909 uint mxcsr; 910 asm nothrow @nogc { stmxcsr mxcsr; } 911 912 /* In the FPU control register, rounding mode is in bits 10 and 913 11. In MXCSR it's in bits 13 and 14. */ 914 mxcsr &= ~(roundingMask << 3); // delete old rounding mode 915 mxcsr |= (newState & roundingMask) << 3; // write new rounding mode 916 917 /* In the FPU control register, masks are bits 0 through 5. 918 In MXCSR they're 7 through 12. */ 919 mxcsr &= ~(allExceptions << 7); // delete old masks 920 mxcsr |= (newState & allExceptions) << 7; // write new exception masks 921 922 asm nothrow @nogc { ldmxcsr mxcsr; } 923 } 924 } 925 else version (RISCV_Any) 926 { 927 asm pure nothrow @nogc 928 { 929 "fscsr %0" : : "r" (newState); 930 } 931 } 932 else version (LoongArch_Any) 933 { 934 asm nothrow @nogc 935 { 936 "movgr2fcsr $fcsr0,%0" : 937 : "r" (newState & (roundingMask | allExceptions)); 938 } 939 } 940 else 941 assert(0, "Not yet supported"); 942 } 943 } 944 945 /// 946 @safe unittest 947 { 948 import std.math.rounding : lrint; 949 950 FloatingPointControl fpctrl; 951 952 fpctrl.rounding = FloatingPointControl.roundDown; 953 assert(lrint(1.5) == 1.0); 954 955 fpctrl.rounding = FloatingPointControl.roundUp; 956 assert(lrint(1.4) == 2.0); 957 958 fpctrl.rounding = FloatingPointControl.roundToNearest; 959 assert(lrint(1.5) == 2.0); 960 } 961 962 @safe unittest 963 { 964 void ensureDefaults() 965 { 966 assert(FloatingPointControl.rounding 967 == FloatingPointControl.roundToNearest); 968 if (FloatingPointControl.hasExceptionTraps) 969 assert(FloatingPointControl.enabledExceptions == 0); 970 } 971 972 { 973 FloatingPointControl ctrl; 974 } 975 ensureDefaults(); 976 977 { 978 FloatingPointControl ctrl; 979 ctrl.rounding = FloatingPointControl.roundDown; 980 assert(FloatingPointControl.rounding == FloatingPointControl.roundDown); 981 } 982 ensureDefaults(); 983 984 if (FloatingPointControl.hasExceptionTraps) 985 { 986 FloatingPointControl ctrl; 987 ctrl.enableExceptions(FloatingPointControl.divByZeroException 988 | FloatingPointControl.overflowException); 989 assert(ctrl.enabledExceptions == 990 (FloatingPointControl.divByZeroException 991 | FloatingPointControl.overflowException)); 992 993 ctrl.rounding = FloatingPointControl.roundUp; 994 assert(FloatingPointControl.rounding == FloatingPointControl.roundUp); 995 } 996 ensureDefaults(); 997 } 998 999 @safe unittest // rounding 1000 { 1001 import std.meta : AliasSeq; 1002 1003 static T addRound(T)(uint rm) 1004 { 1005 pragma(inline, false); 1006 FloatingPointControl fpctrl; 1007 fpctrl.rounding = rm; 1008 T x = 1; 1009 x = forceAddOp(x, 0.1L); 1010 return x; 1011 } 1012 1013 static T subRound(T)(uint rm) 1014 { 1015 pragma(inline, false); 1016 FloatingPointControl fpctrl; 1017 fpctrl.rounding = rm; 1018 T x = -1; 1019 x = forceSubOp(x, 0.1L); 1020 return x; 1021 } 1022 1023 static foreach (T; AliasSeq!(float, double, real)) 1024 {{ 1025 /* Be careful with changing the rounding mode, it interferes 1026 * with common subexpressions. Changing rounding modes should 1027 * be done with separate functions that are not inlined. 1028 */ 1029 1030 { 1031 T u = addRound!(T)(FloatingPointControl.roundUp); 1032 T d = addRound!(T)(FloatingPointControl.roundDown); 1033 T z = addRound!(T)(FloatingPointControl.roundToZero); 1034 1035 assert(u > d); 1036 assert(z == d); 1037 } 1038 1039 { 1040 T u = subRound!(T)(FloatingPointControl.roundUp); 1041 T d = subRound!(T)(FloatingPointControl.roundDown); 1042 T z = subRound!(T)(FloatingPointControl.roundToZero); 1043 1044 assert(u > d); 1045 assert(z == u); 1046 } 1047 }} 1048 } 1049 1050 } // FloatingPointControlSupport 1051 1052 version (StdUnittest) 1053 { 1054 // These helpers are intended to avoid constant propagation by the optimizer. 1055 pragma(inline, false) private @safe 1056 { 1057 T forceAddOp(T)(T x, T y) { return x + y; } 1058 T forceSubOp(T)(T x, T y) { return x - y; } 1059 T forceMulOp(T)(T x, T y) { return x * y; } 1060 T forceDivOp(T)(T x, T y) { return x / y; } 1061 } 1062 }