1 /** Fundamental operations for arbitrary-precision arithmetic 2 * 3 * These functions are for internal use only. 4 */ 5 /* Copyright Don Clugston 2008 - 2010. 6 * Distributed under the Boost Software License, Version 1.0. 7 * (See accompanying file LICENSE_1_0.txt or copy at 8 * http://www.boost.org/LICENSE_1_0.txt) 9 */ 10 /* References: 11 "Modern Computer Arithmetic" (MCA) is the primary reference for all 12 algorithms used in this library. 13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 14 Version 0.5.9, (Oct 2010). 15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 16 Max-Planck Institute fuer Informatik, (Oct 1998). 17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.", 18 INRIA 4664, (Dec 2002). 19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?", 20 http://bodrato.it/papers (2006). 21 - A. Fog, "Optimizing subroutines in assembly language", 22 www.agner.org/optimize (2008). 23 - A. Fog, "The microarchitecture of Intel and AMD CPU's", 24 www.agner.org/optimize (2008). 25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs 26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008). 27 28 Idioms: 29 Many functions in this module use 30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)' 31 in order to disable implicit conversion. 32 33 */ 34 module std.internal.math.biguintcore; 35 36 version (D_InlineAsm_X86) 37 { 38 static import std.internal.math.biguintx86; 39 } 40 static import std.internal.math.biguintnoasm; 41 42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT, 43 KARATSUBASQUARELIMIT; 44 45 alias multibyteAdd = multibyteAddSub!('+'); 46 alias multibyteSub = multibyteAddSub!('-'); 47 48 private import std.traits; 49 private import std.range.primitives; 50 public import std.ascii : LetterCase; 51 import std.range.primitives; 52 import std.traits; 53 54 private: 55 56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for 57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's 58 // used by the other architectures too. 59 // See comments below in case it has to be refactored. 60 version (X86) 61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry) 62 { 63 // must be checked before, otherwise D_InlineAsm_X86 is true. 64 if (__ctfe) 65 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 66 // Runtime. 67 else version (D_InlineAsm_X86) 68 return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry); 69 // Runtime if no asm available. 70 else 71 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 72 } 73 // Any other architecture 74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub; 75 76 version (X86) 77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) 78 { 79 if (__ctfe) 80 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 81 else version (D_InlineAsm_X86) 82 return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry); 83 else 84 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 85 } 86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign; 87 88 version (X86) 89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits) 90 { 91 if (__ctfe) 92 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 93 else version (D_InlineAsm_X86) 94 return std.internal.math.biguintx86.multibyteShl(dest, src, numbits); 95 else 96 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 97 } 98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl; 99 100 version (X86) 101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits) 102 { 103 if (__ctfe) 104 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 105 else version (D_InlineAsm_X86) 106 std.internal.math.biguintx86.multibyteShr(dest, src, numbits); 107 else 108 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 109 } 110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr; 111 112 version (X86) 113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 114 { 115 if (__ctfe) 116 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 117 else version (D_InlineAsm_X86) 118 return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry); 119 else 120 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 121 } 122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul; 123 124 version (X86) 125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 126 { 127 if (__ctfe) 128 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 129 else version (D_InlineAsm_X86) 130 return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry); 131 else 132 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 133 } 134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd; 135 136 version (X86) 137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right) 138 { 139 if (__ctfe) 140 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 141 else version (D_InlineAsm_X86) 142 std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right); 143 else 144 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 145 } 146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate; 147 148 version (X86) 149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow) 150 { 151 if (__ctfe) 152 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 153 else version (D_InlineAsm_X86) 154 return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow); 155 else 156 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 157 } 158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign; 159 160 version (X86) 161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src) 162 { 163 if (__ctfe) 164 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 165 else version (D_InlineAsm_X86) 166 std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src); 167 else 168 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 169 } 170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares; 171 172 version (X86) 173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x) 174 { 175 if (__ctfe) 176 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 177 else version (D_InlineAsm_X86) 178 std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x); 179 else 180 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 181 } 182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate; 183 184 version (X86) 185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x) 186 { 187 if (__ctfe) 188 std.internal.math.biguintnoasm.multibyteSquare(result, x); 189 else version (D_InlineAsm_X86) 190 std.internal.math.biguintx86.multibyteSquare(result, x); 191 else 192 std.internal.math.biguintnoasm.multibyteSquare(result, x); 193 } 194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare; 195 196 // Limits for when to switch between algorithms. 197 // Half the size of the data cache. 198 @nogc nothrow pure @safe size_t getCacheLimit() 199 { 200 import core.cpuid : dataCaches; 201 return dataCaches[0].size * 1024 / 2; 202 } 203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division 204 205 206 // These constants are used by shift operations 207 static if (BigDigit.sizeof == int.sizeof) 208 { 209 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 } 210 alias BIGHALFDIGIT = ushort; 211 } 212 else static if (BigDigit.sizeof == long.sizeof) 213 { 214 alias BIGHALFDIGIT = uint; 215 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 } 216 } 217 else static assert(0, "Unsupported BigDigit size"); 218 219 import std.exception : assumeUnique; 220 import std.traits : isIntegral; 221 enum BigDigitBits = BigDigit.sizeof*8; 222 template maxBigDigits(T) 223 if (isIntegral!T) 224 { 225 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof; 226 } 227 228 static immutable BigDigit[] ZERO = [0]; 229 static immutable BigDigit[] ONE = [1]; 230 static immutable BigDigit[] TWO = [2]; 231 static immutable BigDigit[] TEN = [10]; 232 233 234 public: 235 236 /// BigUint performs memory management and wraps the low-level calls. 237 struct BigUint 238 { 239 private: 240 pure invariant() 241 { 242 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ), 243 "Invariant requires data to not empty or zero"); 244 } 245 246 immutable(BigDigit) [] data = ZERO; 247 248 this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe 249 { 250 data = x; 251 } 252 package(std) // used from: std.bigint 253 this(T)(T x) pure nothrow @safe scope 254 if (isIntegral!T) 255 { 256 opAssign(x); 257 } 258 259 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc { 260 return assumeUnique(input); 261 }; 262 public: 263 // Length in uints 264 @property size_t uintLength() pure nothrow const @safe @nogc scope 265 { 266 static if (BigDigit.sizeof == uint.sizeof) 267 { 268 return data.length; 269 } 270 else static if (BigDigit.sizeof == ulong.sizeof) 271 { 272 return data.length * 2 - 273 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0); 274 } 275 } 276 @property size_t ulongLength() pure nothrow const @safe @nogc scope 277 { 278 static if (BigDigit.sizeof == uint.sizeof) 279 { 280 return (data.length + 1) >> 1; 281 } 282 else static if (BigDigit.sizeof == ulong.sizeof) 283 { 284 return data.length; 285 } 286 } 287 288 // The value at (cast(ulong[]) data)[n] 289 ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope 290 { 291 static if (BigDigit.sizeof == int.sizeof) 292 { 293 if (data.length == n*2 + 1) return data[n*2]; 294 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 ); 295 } 296 else static if (BigDigit.sizeof == long.sizeof) 297 { 298 return data[n]; 299 } 300 } 301 302 uint peekUint(size_t n) pure nothrow const @safe @nogc scope 303 { 304 static if (BigDigit.sizeof == int.sizeof) 305 { 306 return data[n]; 307 } 308 else 309 { 310 immutable x = data[n >> 1]; 311 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x; 312 } 313 } 314 315 /// 316 void opAssign(Tulong)(Tulong u) pure nothrow @safe scope 317 if (is (Tulong == ulong)) 318 { 319 if (u == 0) data = ZERO; 320 else if (u == 1) data = ONE; 321 else if (u == 2) data = TWO; 322 else if (u == 10) data = TEN; 323 else 324 { 325 static if (BigDigit.sizeof == int.sizeof) 326 { 327 uint ulo = cast(uint)(u & 0xFFFF_FFFF); 328 uint uhi = cast(uint)(u >> 32); 329 if (uhi == 0) 330 { 331 data = [ulo]; 332 } 333 else 334 { 335 data = [ulo, uhi]; 336 } 337 } 338 else static if (BigDigit.sizeof == long.sizeof) 339 { 340 data = [u]; 341 } 342 } 343 } 344 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope 345 { 346 this.data = y.data; 347 } 348 349 /// 350 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope 351 { 352 if (data.length != y.data.length) 353 return (data.length > y.data.length) ? 1 : -1; 354 size_t k = highestDifferentDigit(data, y.data); 355 if (data[k] == y.data[k]) 356 return 0; 357 return data[k] > y.data[k] ? 1 : -1; 358 } 359 360 /// 361 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope 362 if (is (Tulong == ulong)) 363 { 364 if (data.length > maxBigDigits!Tulong) 365 return 1; 366 367 foreach_reverse (i; 0 .. maxBigDigits!Tulong) 368 { 369 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits)); 370 if (tmp == 0) 371 if (data.length >= i+1) 372 { 373 // Since ZERO is [0], so we cannot simply return 1 here, as 374 // data[i] would be 0 for i == 0 in that case. 375 return (data[i] > 0) ? 1 : 0; 376 } 377 else 378 continue; 379 else 380 if (i+1 > data.length) 381 return -1; 382 else if (tmp != data[i]) 383 return data[i] > tmp ? 1 : -1; 384 } 385 return 0; 386 } 387 388 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope 389 { 390 return y.data[] == data[]; 391 } 392 393 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope 394 { 395 if (data.length > 2) 396 return false; 397 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 398 uint yhi = cast(uint)(y >> 32); 399 if (data.length == 2 && data[1]!=yhi) 400 return false; 401 if (data.length == 1 && yhi != 0) 402 return false; 403 return (data[0] == ylo); 404 } 405 406 bool isZero() pure const nothrow @safe @nogc scope 407 { 408 return data.length == 1 && data[0] == 0; 409 } 410 411 size_t numBytes() pure nothrow const @safe @nogc scope 412 { 413 return data.length * BigDigit.sizeof; 414 } 415 416 // the extra bytes are added to the start of the string 417 char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope 418 { 419 immutable predictlength = 20+20*(data.length/2); // just over 19 420 char [] buff = new char[frontExtraBytes + predictlength]; 421 ptrdiff_t sofar = biguintToDecimal(buff, data.dup); 422 return buff[sofar-frontExtraBytes..$]; 423 } 424 425 /** Convert to a hex string, printing a minimum number of digits 'minPadding', 426 * allocating an additional 'frontExtraBytes' at the start of the string. 427 * Padding is done with padChar, which may be '0' or ' '. 428 * 'separator' is a digit separation character. If non-zero, it is inserted 429 * between every 8 digits. 430 * Separator characters do not contribute to the minPadding. 431 */ 432 char [] toHexString(int frontExtraBytes, char separator = 0, 433 int minPadding=0, char padChar = '0', 434 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope 435 { 436 // Calculate number of extra padding bytes 437 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof) 438 ? minPadding - data.length * 2 * BigDigit.sizeof : 0; 439 440 // Length not including separator bytes 441 size_t lenBytes = data.length * 2 * BigDigit.sizeof; 442 443 // Calculate number of separator bytes 444 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0; 445 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0; 446 447 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes]; 448 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase); 449 if (extraPad > 0) 450 { 451 if (separator) 452 { 453 size_t start = frontExtraBytes; // first index to pad 454 if (extraPad &7) 455 { 456 // Do 1 to 7 extra zeros. 457 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar; 458 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator); 459 start += (extraPad & 7) + 1; 460 } 461 for (int i=0; i< (extraPad >> 3); ++i) 462 { 463 buff[start .. start + 8] = padChar; 464 buff[start + 8] = (padChar == ' ' ? ' ' : separator); 465 start += 9; 466 } 467 } 468 else 469 { 470 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar; 471 } 472 } 473 int z = frontExtraBytes; 474 if (lenBytes > minPadding) 475 { 476 // Strip leading zeros. 477 ptrdiff_t maxStrip = lenBytes - minPadding; 478 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0) 479 { 480 ++z; 481 --maxStrip; 482 } 483 } 484 if (padChar!='0') 485 { 486 // Convert leading zeros into padChars. 487 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k) 488 { 489 if (buff[k]=='0') buff[k]=padChar; 490 } 491 } 492 return buff[z-frontExtraBytes..$]; 493 } 494 495 /** 496 * Convert to an octal string. 497 */ 498 char[] toOctalString() pure nothrow @safe const scope 499 { 500 auto predictLength = 1 + data.length*BigDigitBits / 3; 501 char[] buff = new char[predictLength]; 502 size_t firstNonZero = biguintToOctal(buff, data); 503 return buff[firstNonZero .. $]; 504 } 505 506 // return false if invalid character found 507 bool fromHexString(Range)(Range s) scope 508 if (isBidirectionalRange!Range && isSomeChar!(ElementType!Range)) 509 { 510 import std.range : walkLength; 511 512 //Strip leading zeros 513 while (!s.empty && s.front == '0') 514 s.popFront; 515 516 if (s.empty) 517 { 518 data = ZERO; 519 return true; 520 } 521 522 immutable len = (s.save.walkLength + 15) / 4; 523 auto tmp = new BigDigit[len + 1]; 524 uint part, sofar, partcount; 525 526 foreach_reverse (character; s) 527 { 528 if (character == '_') 529 continue; 530 531 uint x; 532 if (character >= '0' && character <= '9') 533 { 534 x = character - '0'; 535 } 536 else if (character >= 'A' && character <= 'F') 537 { 538 x = character - 'A' + 10; 539 } 540 else if (character >= 'a' && character <= 'f') 541 { 542 x = character - 'a' + 10; 543 } 544 else 545 { 546 return false; 547 } 548 549 part >>= 4; 550 part |= (x << (32 - 4)); 551 ++partcount; 552 553 if (partcount == 8) 554 { 555 tmp[sofar] = part; 556 ++sofar; 557 partcount = 0; 558 part = 0; 559 } 560 } 561 if (part) 562 { 563 for ( ; partcount != 8; ++partcount) part >>= 4; 564 tmp[sofar] = part; 565 ++sofar; 566 } 567 if (sofar == 0) 568 data = ZERO; 569 else 570 data = trustedAssumeUnique(tmp[0 .. sofar]); 571 572 return true; 573 } 574 575 // return true if OK; false if erroneous characters found 576 bool fromDecimalString(Range)(Range s) scope 577 if (isForwardRange!Range && isSomeChar!(ElementType!Range)) 578 { 579 import std.range : walkLength; 580 581 while (!s.empty && s.front == '0') 582 { 583 s.popFront; 584 } 585 586 if (s.empty) 587 { 588 data = ZERO; 589 return true; 590 } 591 592 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19; 593 auto tmp = new BigDigit[predict_length]; 594 595 tmp.length = biguintFromDecimal(tmp, s); 596 597 data = trustedAssumeUnique(tmp); 598 return true; 599 } 600 601 void fromMagnitude(Range)(Range magnitude) scope 602 if (isInputRange!Range 603 && (isForwardRange!Range || hasLength!Range) 604 && isUnsigned!(ElementType!Range)) 605 { 606 while (!magnitude.empty && magnitude.front == 0) 607 magnitude.popFront; 608 static if (hasLength!Range) 609 immutable inputLen = magnitude.length; 610 else 611 immutable inputLen = magnitude.save.walkLength; 612 if (!inputLen) 613 { 614 this.data = ZERO; 615 return; 616 } 617 // `magnitude` has its most significant element first but BigUint.data 618 // stores the most significant last. 619 BigDigit[] newDigits; 620 alias E = ElementType!Range; 621 static if (E.sizeof == BigDigit.sizeof) 622 { 623 newDigits = new BigDigit[inputLen]; 624 foreach_reverse (ref digit; newDigits) 625 { 626 digit = magnitude.front; 627 magnitude.popFront(); 628 } 629 } 630 else static if (E.sizeof < BigDigit.sizeof) 631 { 632 enum elementsPerDigit = BigDigit.sizeof / E.sizeof; 633 newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit]; 634 immutable remainder = inputLen % elementsPerDigit; 635 // If there is a remainder specially assemble the most significant digit. 636 if (remainder) 637 { 638 BigDigit tmp = magnitude.front; 639 magnitude.popFront(); 640 foreach (_; 1 .. remainder) 641 { 642 tmp = (tmp << (E.sizeof * 8)) | magnitude.front; 643 magnitude.popFront(); 644 } 645 newDigits[$-1] = tmp; 646 } 647 // Assemble full digits from most to least significant. 648 foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)]) 649 { 650 BigDigit tmp; 651 static foreach (n; 0 .. elementsPerDigit) 652 { 653 tmp |= cast(BigDigit) magnitude.front << 654 ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8); 655 magnitude.popFront(); 656 } 657 digit = tmp; 658 } 659 } 660 else static if (E.sizeof > BigDigit.sizeof) 661 { 662 enum digitsPerElement = E.sizeof / BigDigit.sizeof; 663 newDigits = new BigDigit[inputLen * digitsPerElement]; 664 size_t i = newDigits.length - 1; 665 foreach (element; magnitude) 666 { 667 static foreach (n; 0 .. digitsPerElement) 668 newDigits[i - n] = 669 cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8)); 670 i -= digitsPerElement; 671 } 672 while (newDigits[$-1] == 0) 673 newDigits = newDigits[0 .. $-1]; 674 } 675 else 676 static assert(0); 677 this.data = trustedAssumeUnique(newDigits); 678 return; 679 } 680 681 nothrow pure @safe unittest 682 { 683 immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD]; 684 // Internal representation is most-significant-last but `fromMagnitude` 685 // argument is most-significant-first. 686 immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005]; 687 BigUint b; 688 // Test with reference magnitude. 689 b.fromMagnitude(referenceMagnitude); 690 assert(b.data == referenceData); 691 // Test ubyte array. 692 import std.bitmanip : nativeToBigEndian; 693 ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~ 694 nativeToBigEndian(referenceMagnitude[1]) ~ 695 nativeToBigEndian(referenceMagnitude[2]); 696 b.data = ZERO; 697 b.fromMagnitude(ubyteMagnitude); 698 assert(b.data == referenceData); 699 // Test ulong array. 700 static if (BigDigit.sizeof == uint.sizeof) 701 immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]), 702 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2], 703 ]; 704 else static if (BigDigit.sizeof == ulong.sizeof) 705 alias ulongMagnitude = referenceMagnitude; 706 b.data = ZERO; 707 b.fromMagnitude(ulongMagnitude); 708 assert(b.data == referenceData); 709 } 710 711 //////////////////////// 712 // 713 // All of these member functions create a new BigUint. 714 715 // return x >> y 716 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope 717 if (op == ">>" && is (Tulong == ulong)) 718 { 719 assert(y > 0, "Can not right shift BigUint by 0"); 720 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 721 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO); 722 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 723 if (bits == 0) 724 { 725 return BigUint(data[words..$]); 726 } 727 else 728 { 729 uint [] result = new BigDigit[data.length - words]; 730 multibyteShr(result, data[words..$], bits); 731 732 if (result.length > 1 && result[$-1] == 0) 733 return BigUint(trustedAssumeUnique(result[0 .. $-1])); 734 else 735 return BigUint(trustedAssumeUnique(result)); 736 } 737 } 738 739 // return x << y 740 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope 741 if (op == "<<" && is (Tulong == ulong)) 742 { 743 assert(y > 0, "Can not left shift BigUint by 0"); 744 if (isZero()) return this; 745 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 746 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max), 747 "Shift result exceeds temporary store"); 748 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 749 BigDigit [] result = new BigDigit[data.length + words+1]; 750 result[0 .. words] = 0; 751 if (bits == 0) 752 { 753 result[words .. words+data.length] = data[]; 754 return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 755 } 756 else 757 { 758 immutable c = multibyteShl(result[words .. words+data.length], data, bits); 759 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 760 result[$-1] = c; 761 return BigUint(trustedAssumeUnique(result)); 762 } 763 } 764 765 // If wantSub is false, return x + y, leaving sign unchanged 766 // If wantSub is true, return abs(x - y), negating sign if x < y 767 static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, bool wantSub, ref bool sign) pure nothrow @safe 768 if (is(Tulong == ulong)) 769 { 770 BigUint r; 771 if (wantSub) 772 { // perform a subtraction 773 if (x.data.length > 2) 774 { 775 // subInt returns GC allocated array, can be safely cast to immutable 776 r.data = (() @trusted => cast(immutable) subInt(x.data, y))(); 777 } 778 else 779 { // could change sign! 780 ulong xx = x.data[0]; 781 if (x.data.length > 1) 782 xx += (cast(ulong) x.data[1]) << 32; 783 ulong d; 784 if (xx <= y) 785 { 786 d = y - xx; 787 sign = !sign; 788 } 789 else 790 { 791 d = xx - y; 792 } 793 if (d == 0) 794 { 795 r = 0UL; 796 sign = false; 797 return r; 798 } 799 if (d > uint.max) 800 { 801 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)]; 802 } 803 else 804 { 805 r.data = [cast(uint)(d & 0xFFFF_FFFF)]; 806 } 807 } 808 } 809 else 810 { 811 // addInt returns GC allocated array, can be safely cast to immutable 812 r.data = (() @trusted => cast(immutable) addInt(x.data, y))(); 813 } 814 return r; 815 } 816 817 // If wantSub is false, return x + y, leaving sign unchanged. 818 // If wantSub is true, return abs(x - y), negating sign if x<y 819 static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign) 820 pure nothrow @safe 821 { 822 BigUint r; 823 if (wantSub) 824 { // perform a subtraction 825 bool negative; 826 // sub returns GC allocated array, can be safely cast to immutable 827 r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))(); 828 sign ^= negative; 829 if (r.isZero()) 830 { 831 sign = false; 832 } 833 } 834 else 835 { 836 // add returns GC allocated array, can be safely cast to immutable 837 r.data = (() @trusted => cast(immutable) add(x.data, y.data))(); 838 } 839 return r; 840 } 841 842 843 // return x*y. 844 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe 845 { 846 if (y == 0 || x == 0) return BigUint(ZERO); 847 static if (T.sizeof * 8 <= 32) 848 uint hi = 0; 849 else 850 uint hi = cast(uint) (y >>> 32); 851 uint lo = cast(uint) (y & 0xFFFF_FFFF); 852 uint [] result = new BigDigit[x.data.length+1+(hi != 0)]; 853 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0); 854 if (hi != 0) 855 { 856 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1], 857 x.data, hi, 0); 858 } 859 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 860 } 861 862 /* return x * y. 863 */ 864 static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe 865 { 866 if (y == 0 || x == 0) 867 return BigUint(ZERO); 868 auto len = x.data.length + y.data.length; 869 BigDigit [] result = new BigDigit[len]; 870 if (y.data.length > x.data.length) 871 { 872 mulInternal(result, y.data, x.data); 873 } 874 else 875 { 876 if (x.data[]==y.data[]) squareInternal(result, x.data); 877 else mulInternal(result, x.data, y.data); 878 } 879 // the highest element could be zero, 880 // in which case we need to reduce the length 881 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 882 } 883 884 // return x / y 885 static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe 886 if ( is(immutable T == immutable uint) ) 887 { 888 uint y = y_; 889 if (y == 1) 890 return x; 891 uint [] result = new BigDigit[x.data.length]; 892 if ((y&(-y))==y) 893 { 894 assert(y != 0, "BigUint division by zero"); 895 // perfect power of 2 896 uint b = 0; 897 for (;y != 1; y>>=1) 898 { 899 ++b; 900 } 901 multibyteShr(result, x.data, b); 902 } 903 else 904 { 905 result[] = x.data[]; 906 cast(void) multibyteDivAssign(result, y, 0); 907 } 908 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 909 } 910 911 static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe 912 if ( is(immutable T == immutable ulong) ) 913 { 914 if (y <= uint.max) 915 return divInt!uint(x, cast(uint) y); 916 if (x.data.length < 2) 917 return BigUint(ZERO); 918 uint hi = cast(uint)(y >>> 32); 919 uint lo = cast(uint)(y & 0xFFFF_FFFF); 920 immutable uint[2] z = [lo, hi]; 921 BigDigit[] result = new BigDigit[x.data.length - z.length + 1]; 922 divModInternal(result, null, x.data, z[]); 923 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 924 } 925 926 // return x % y 927 static uint modInt(T)(scope BigUint x, T y_) pure 928 if ( is(immutable T == immutable uint) ) 929 { 930 import core.memory : GC; 931 uint y = y_; 932 assert(y != 0, "% 0 not allowed"); 933 if ((y&(-y)) == y) 934 { // perfect power of 2 935 return x.data[0] & (y-1); 936 } 937 else 938 { 939 // horribly inefficient - malloc, copy, & store are unnecessary. 940 uint [] wasteful = new BigDigit[x.data.length]; 941 wasteful[] = x.data[]; 942 immutable rem = multibyteDivAssign(wasteful, y, 0); 943 if (!__ctfe) 944 { 945 //use free only at runtime 946 () @trusted { GC.free(wasteful.ptr); } (); 947 } 948 return rem; 949 } 950 } 951 952 // return x / y 953 static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe 954 { 955 if (y.data.length > x.data.length) 956 return BigUint(ZERO); 957 if (y.data.length == 1) 958 return divInt(x, y.data[0]); 959 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 960 divModInternal(result, null, x.data, y.data); 961 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 962 } 963 964 // return x % y 965 static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe 966 { 967 if (y.data.length > x.data.length) return x; 968 if (y.data.length == 1) 969 { 970 return BigUint([modInt(x, y.data[0])]); 971 } 972 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 973 BigDigit [] rem = new BigDigit[y.data.length]; 974 divModInternal(result, rem, x.data, y.data); 975 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 976 } 977 978 // Return x / y in quotient, x % y in remainder 979 static void divMod(BigUint x, scope BigUint y, 980 out BigUint quotient, out BigUint remainder) pure nothrow @safe 981 { 982 /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */ 983 if (y.data.length > x.data.length) 984 { 985 quotient = 0uL; 986 remainder = x; 987 } 988 else if (y.data.length == 1) 989 { 990 quotient = divInt(x, y.data[0]); 991 remainder = BigUint([modInt(x, y.data[0])]); 992 } 993 else 994 { 995 BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1]; 996 BigDigit[] rem = new BigDigit[y.data.length]; 997 divModInternal(quot, rem, x.data, y.data); 998 quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot))); 999 remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 1000 } 1001 } 1002 1003 // return x op y 1004 static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign) 1005 pure nothrow @safe 1006 if (op == "|" || op == "^" || op == "&") 1007 { 1008 auto d1 = includeSign(x.data, y.uintLength, xSign); 1009 auto d2 = includeSign(y.data, x.uintLength, ySign); 1010 1011 foreach (i; 0 .. d1.length) 1012 { 1013 mixin("d1[i] " ~ op ~ "= d2[i];"); 1014 } 1015 1016 mixin("resultSign = xSign " ~ op ~ " ySign;"); 1017 1018 if (resultSign) 1019 { 1020 twosComplement(d1, d1); 1021 } 1022 1023 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1))); 1024 } 1025 1026 /** 1027 * Return a BigUint which is x raised to the power of y. 1028 * Method: Powers of 2 are removed from x, then left-to-right binary 1029 * exponentiation is used. 1030 * Memory allocation is minimized: at most one temporary BigUint is used. 1031 */ 1032 static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe 1033 { 1034 // Deal with the degenerate cases first. 1035 if (y == 0) return BigUint(ONE); 1036 if (y == 1) return x; 1037 if (x == 0 || x == 1) return x; 1038 1039 BigUint result; 1040 1041 // Simplify, step 1: Remove all powers of 2. 1042 uint firstnonzero = firstNonZeroDigit(x.data); 1043 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits)) 1044 // where BigDigitBits = BigDigit.sizeof * 8 1045 1046 // See if x[firstnonzero..$] can now fit into a single digit. 1047 bool singledigit = ((x.data.length - firstnonzero) == 1); 1048 // If true, then x0 is that digit 1049 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits)) 1050 BigDigit x0 = x.data[firstnonzero]; 1051 assert(x0 != 0, "pow(0, y) not allowed"); 1052 // Length of the non-zero portion 1053 size_t nonzerolength = x.data.length - firstnonzero; 1054 ulong y0; 1055 uint evenbits = 0; // number of even bits in the bottom of x 1056 while (!(x0 & 1)) 1057 { 1058 x0 >>= 1; 1059 ++evenbits; 1060 } 1061 1062 if (x.data.length- firstnonzero == 2) 1063 { 1064 // Check for a single digit straddling a digit boundary 1065 const BigDigit x1 = x.data[firstnonzero+1]; 1066 if ((x1 >> evenbits) == 0) 1067 { 1068 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits)); 1069 singledigit = true; 1070 } 1071 } 1072 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1073 1074 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end 1075 1076 // Simplify, step 2: For singledigits, see if we can trivially reduce y 1077 1078 BigDigit finalMultiplier = 1UL; 1079 1080 if (singledigit) 1081 { 1082 // x fits into a single digit. Raise it to the highest power we can 1083 // that still fits into a single digit, then reduce the exponent accordingly. 1084 // We're quite likely to have a residual multiply at the end. 1085 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100. 1086 // and 5^^13 still fits into a uint. 1087 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK); 1088 if (x0 == 1) 1089 { // Perfect power of 2 1090 result = 1UL; 1091 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1092 } 1093 immutable p = highestPowerBelowUintMax(x0); 1094 if (y <= p) 1095 { // Just do it with pow 1096 result = cast(ulong) intpow(x0, y); 1097 if (evenbits + firstnonzero == 0) 1098 return result; 1099 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1100 } 1101 y0 = y / p; 1102 finalMultiplier = intpow(x0, y - y0*p); 1103 x0 = intpow(x0, p); 1104 // Result is x0 1105 nonzerolength = 1; 1106 } 1107 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1108 1109 // Perform a crude check for overflow and allocate result buffer. 1110 // The length required is y * lg2(x) bits. 1111 // which will always fit into y*x.length digits. But this is 1112 // a gross overestimate if x is small (length 1 or 2) and the highest 1113 // digit is nearly empty. 1114 // A better estimate is: 1115 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits, 1116 // and the first term is always between 1117 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and 1118 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS 1119 // For single digit payloads, we already have 1120 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1121 // and x0 is almost a full digit, so it's a tight estimate. 1122 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y 1123 // Note that the divisions must be rounded up. 1124 1125 // Estimated length in BigDigits 1126 immutable estimatelength = singledigit 1127 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y 1128 : x.data.length * y; 1129 // Imprecise check for overflow. Makes the extreme cases easier to debug 1130 // (less extreme overflow will result in an out of memory error). 1131 if (estimatelength > uint.max/(4*BigDigit.sizeof)) 1132 assert(0, "Overflow in BigInt.pow"); 1133 1134 // The result buffer includes space for all the trailing zeros 1135 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength]; 1136 1137 // Do all the powers of 2! 1138 size_t result_start = cast(size_t)( firstnonzero * y 1139 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0)); 1140 1141 resultBuffer[0 .. result_start] = 0; 1142 BigDigit [] t1 = resultBuffer[result_start..$]; 1143 BigDigit [] r1; 1144 1145 if (singledigit) 1146 { 1147 r1 = t1[0 .. 1]; 1148 r1[0] = x0; 1149 y = y0; 1150 } 1151 else 1152 { 1153 // It's not worth right shifting by evenbits unless we also shrink the length after each 1154 // multiply or squaring operation. That might still be worthwhile for large y. 1155 r1 = t1[0 .. x.data.length - firstnonzero]; 1156 r1[0..$] = x.data[firstnonzero..$]; 1157 } 1158 1159 if (y>1) 1160 { // Set r1 = r1 ^^ y. 1161 // The secondary buffer only needs space for the multiplication results 1162 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start]; 1163 BigDigit [] r2; 1164 1165 int shifts = 63; // num bits in a long 1166 while (!(y & 0x8000_0000_0000_0000L)) 1167 { 1168 y <<= 1; 1169 --shifts; 1170 } 1171 y <<=1; 1172 1173 while (y != 0) 1174 { 1175 // For each bit of y: Set r1 = r1 * r1 1176 // If the bit is 1, set r1 = r1 * x 1177 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5. 1178 // Optimization opportunity: if more than 2 bits in y are set, 1179 // it's usually possible to reduce the number of multiplies 1180 // by caching odd powers of x. eg for y = 54, 1181 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2 1182 r2 = t2[0 .. r1.length*2]; 1183 squareInternal(r2, r1); 1184 if (y & 0x8000_0000_0000_0000L) 1185 { 1186 r1 = t1[0 .. r2.length + nonzerolength]; 1187 if (singledigit) 1188 { 1189 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0); 1190 } 1191 else 1192 { 1193 mulInternal(r1, r2, x.data[firstnonzero..$]); 1194 } 1195 } 1196 else 1197 { 1198 r1 = t1[0 .. r2.length]; 1199 r1[] = r2[]; 1200 } 1201 y <<=1; 1202 shifts--; 1203 } 1204 while (shifts>0) 1205 { 1206 r2 = t2[0 .. r1.length * 2]; 1207 squareInternal(r2, r1); 1208 r1 = t1[0 .. r2.length]; 1209 r1[] = r2[]; 1210 --shifts; 1211 } 1212 } 1213 1214 if (finalMultiplier != 1) 1215 { 1216 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0); 1217 if (carry) 1218 { 1219 r1 = t1[0 .. r1.length + 1]; 1220 r1[$-1] = carry; 1221 } 1222 } 1223 if (evenshiftbits) 1224 { 1225 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits); 1226 if (carry != 0) 1227 { 1228 r1 = t1[0 .. r1.length + 1]; 1229 r1[$ - 1] = carry; 1230 } 1231 } 1232 while (r1[$ - 1]==0) 1233 { 1234 r1=r1[0 .. $ - 1]; 1235 } 1236 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length])); 1237 } 1238 1239 // Implement toHash so that BigUint works properly as an AA key. 1240 size_t toHash() const @nogc nothrow pure @safe scope 1241 { 1242 return .hashOf(data); 1243 } 1244 1245 } // end BigUint 1246 1247 @safe pure nothrow unittest 1248 { 1249 // ulong comparison test 1250 BigUint a = [1]; 1251 assert(a == 1); 1252 // https://issues.dlang.org/show_bug.cgi?id=9548 1253 assert(a < 0x8000_0000_0000_0000UL); 1254 1255 // https://issues.dlang.org/show_bug.cgi?id=12234 1256 BigUint z = [0]; 1257 assert(z == 0UL); 1258 assert(!(z > 0UL)); 1259 assert(!(z < 0UL)); 1260 } 1261 1262 // https://issues.dlang.org/show_bug.cgi?id=16223 1263 @system pure nothrow unittest 1264 { 1265 BigUint a = [3]; 1266 int b = 5; 1267 assert(BigUint.mulInt(a,b) == 15); 1268 } 1269 1270 // Remove leading zeros from x, to restore the BigUint invariant 1271 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe 1272 { 1273 size_t k = x.length; 1274 while (k>1 && x[k - 1]==0) --k; 1275 return x[0 .. k]; 1276 } 1277 1278 pure @safe unittest 1279 { 1280 BigUint r = BigUint([5]); 1281 BigUint t = BigUint([7]); 1282 BigUint s = BigUint.mod(r, t); 1283 assert(s == 5); 1284 } 1285 1286 pure @safe unittest 1287 { //evaluate mod at compile time 1288 enum BigUint r = BigUint([5]); 1289 enum BigUint t = BigUint([7]); 1290 enum BigUint s = BigUint.mod(r, t); 1291 static assert(s == 5); 1292 } 1293 1294 @safe pure unittest 1295 { 1296 BigUint r; 1297 r = 5UL; 1298 assert(r.peekUlong(0) == 5UL); 1299 assert(r.peekUint(0) == 5U); 1300 r = 0x1234_5678_9ABC_DEF0UL; 1301 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL); 1302 assert(r.peekUint(0) == 0x9ABC_DEF0U); 1303 } 1304 1305 1306 // Pow tests 1307 pure @safe unittest 1308 { 1309 BigUint r, s; 1310 r.fromHexString("80000000_00000001"); 1311 s = BigUint.pow(r, 5); 1312 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000" 1313 ~ "_00000002_80000000_00000001"); 1314 assert(s == r); 1315 s = 10UL; 1316 s = BigUint.pow(s, 39); 1317 r.fromDecimalString("1000000000000000000000000000000000000000"); 1318 assert(s == r); 1319 r.fromHexString("1_E1178E81_00000000"); 1320 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds 1321 1322 r.fromDecimalString("000_000_00"); 1323 assert(r == 0); 1324 r.fromDecimalString("0007"); 1325 assert(r == 7); 1326 r.fromDecimalString("0"); 1327 assert(r == 0); 1328 } 1329 1330 // Radix conversion tests 1331 @safe pure unittest 1332 { 1333 BigUint r; 1334 r.fromHexString("1_E1178E81_00000000"); 1335 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000"); 1336 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000"); 1337 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000"); 1338 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000"); 1339 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000"); 1340 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000"); 1341 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000"); 1342 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000"); 1343 r = 0UL; 1344 assert(r.toHexString(0, '_', 0) == "0"); 1345 assert(r.toHexString(0, '_', 7) == "0000000"); 1346 assert(r.toHexString(0, '_', 7, ' ') == " 0"); 1347 assert(r.toHexString(0, '#', 9) == "0#00000000"); 1348 assert(r.toHexString(0, 0, 9) == "000000000"); 1349 } 1350 1351 // 1352 @safe pure unittest 1353 { 1354 BigUint r; 1355 r.fromHexString("1_E1178E81_00000000"); 1356 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000"); 1357 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000"); 1358 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000"); 1359 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000"); 1360 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000"); 1361 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000"); 1362 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000"); 1363 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000"); 1364 r = 0UL; 1365 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0"); 1366 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000"); 1367 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0"); 1368 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000"); 1369 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000"); 1370 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000"); 1371 } 1372 1373 1374 private: 1375 void twosComplement(const(BigDigit) [] x, BigDigit[] result) 1376 pure nothrow @safe 1377 { 1378 foreach (i; 0 .. x.length) 1379 { 1380 result[i] = ~x[i]; 1381 } 1382 result[x.length..$] = BigDigit.max; 1383 1384 foreach (i; 0 .. result.length) 1385 { 1386 if (result[i] == BigDigit.max) 1387 { 1388 result[i] = 0; 1389 } 1390 else 1391 { 1392 result[i] += 1; 1393 break; 1394 } 1395 } 1396 } 1397 1398 // Encode BigInt as BigDigit array (sign and 2's complement) 1399 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign) 1400 pure nothrow @safe 1401 { 1402 size_t length = (x.length > minSize) ? x.length : minSize; 1403 BigDigit [] result = new BigDigit[length]; 1404 if (sign) 1405 { 1406 twosComplement(x, result); 1407 } 1408 else 1409 { 1410 result[0 .. x.length] = x; 1411 } 1412 return result; 1413 } 1414 1415 // works for any type 1416 T intpow(T)(T x, ulong n) pure nothrow @safe 1417 { 1418 T p; 1419 1420 switch (n) 1421 { 1422 case 0: 1423 p = 1; 1424 break; 1425 1426 case 1: 1427 p = x; 1428 break; 1429 1430 case 2: 1431 p = x * x; 1432 break; 1433 1434 default: 1435 p = 1; 1436 while (1) 1437 { 1438 if (n & 1) 1439 p *= x; 1440 n >>= 1; 1441 if (!n) 1442 break; 1443 x *= x; 1444 } 1445 break; 1446 } 1447 return p; 1448 } 1449 1450 1451 // returns the maximum power of x that will fit in a uint. 1452 int highestPowerBelowUintMax(uint x) pure nothrow @safe 1453 { 1454 assert(x > 1, "x must be greater than 1"); 1455 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 1456 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7]; 1457 if (x<24) return maxpwr[x-2]; 1458 if (x<41) return 6; 1459 if (x<85) return 5; 1460 if (x<256) return 4; 1461 if (x<1626) return 3; 1462 if (x<65_536) return 2; 1463 return 1; 1464 } 1465 1466 // returns the maximum power of x that will fit in a ulong. 1467 int highestPowerBelowUlongMax(uint x) pure nothrow @safe 1468 { 1469 assert(x > 1, "x must be greater than 1"); 1470 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 1471 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 1472 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 1473 12, 12, 12, 12, 12, 12, 12, 12, 12]; 1474 if (x<41) return maxpwr[x-2]; 1475 if (x<57) return 11; 1476 if (x<85) return 10; 1477 if (x<139) return 9; 1478 if (x<256) return 8; 1479 if (x<566) return 7; 1480 if (x<1626) return 6; 1481 if (x<7132) return 5; 1482 if (x<65_536) return 4; 1483 if (x<2_642_246) return 3; 1484 return 2; 1485 } 1486 1487 version (StdUnittest) 1488 { 1489 1490 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe 1491 { 1492 int pwr = 1; 1493 for (ulong q = x;x*q < cast(ulong) uint.max; ) 1494 { 1495 q*=x; ++pwr; 1496 } 1497 return pwr; 1498 } 1499 1500 @safe pure unittest 1501 { 1502 assert(highestPowerBelowUintMax(10)==9); 1503 for (int k=82; k<88; ++k) 1504 { 1505 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k)); 1506 } 1507 } 1508 1509 } 1510 1511 1512 /* General unsigned subtraction routine for bigints. 1513 * Sets result = x - y. If the result is negative, negative will be true. 1514 * Returns: 1515 * unique memory 1516 */ 1517 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative) 1518 pure nothrow @safe 1519 { 1520 if (x.length == y.length) 1521 { 1522 // There's a possibility of cancellation, if x and y are almost equal. 1523 ptrdiff_t last = highestDifferentDigit(x, y); 1524 BigDigit [] result = new BigDigit[last+1]; 1525 if (x[last] < y[last]) 1526 { // we know result is negative 1527 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0); 1528 *negative = true; 1529 } 1530 else 1531 { // positive or zero result 1532 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0); 1533 *negative = false; 1534 } 1535 while (result.length > 1 && result[$-1] == 0) 1536 { 1537 result = result[0..$-1]; 1538 } 1539 // if (result.length >1 && result[$-1]==0) return result[0..$-1]; 1540 return result; 1541 } 1542 // Lengths are different 1543 const(BigDigit) [] large, small; 1544 if (x.length < y.length) 1545 { 1546 *negative = true; 1547 large = y; small = x; 1548 } 1549 else 1550 { 1551 *negative = false; 1552 large = x; small = y; 1553 } 1554 // result.length will be equal to larger length, or could decrease by 1. 1555 1556 1557 BigDigit [] result = new BigDigit[large.length]; 1558 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0); 1559 result[small.length..$] = large[small.length..$]; 1560 if (carry) 1561 { 1562 multibyteIncrementAssign!('-')(result[small.length..$], carry); 1563 } 1564 while (result.length > 1 && result[$-1] == 0) 1565 { 1566 result = result[0..$-1]; 1567 } 1568 return result; 1569 } 1570 1571 1572 /* 1573 * return a + b 1574 * Returns: 1575 * unique memory 1576 */ 1577 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe 1578 { 1579 const(BigDigit) [] x, y; 1580 if (a.length < b.length) 1581 { 1582 x = b; y = a; 1583 } 1584 else 1585 { 1586 x = a; y = b; 1587 } 1588 // now we know x.length > y.length 1589 // create result. add 1 in case it overflows 1590 BigDigit [] result = new BigDigit[x.length + 1]; 1591 1592 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0); 1593 if (x.length != y.length) 1594 { 1595 result[y.length..$-1]= x[y.length..$]; 1596 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry); 1597 } 1598 if (carry) 1599 { 1600 result[$-1] = carry; 1601 return result; 1602 } 1603 else 1604 return result[0..$-1]; 1605 } 1606 1607 /** return x + y 1608 */ 1609 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow 1610 { 1611 uint hi = cast(uint)(y >>> 32); 1612 uint lo = cast(uint)(y& 0xFFFF_FFFF); 1613 auto len = x.length; 1614 if (x.length < 2 && hi != 0) ++len; 1615 BigDigit [] result = new BigDigit[len+1]; 1616 result[0 .. x.length] = x[]; 1617 if (x.length < 2 && hi != 0) 1618 { 1619 result[1]=hi; 1620 hi=0; 1621 } 1622 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo); 1623 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi); 1624 if (carry) 1625 { 1626 result[$-1] = carry; 1627 return result; 1628 } 1629 else 1630 return result[0..$-1]; 1631 } 1632 1633 /** Return x - y. 1634 * x must be greater than y. 1635 */ 1636 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe 1637 { 1638 uint hi = cast(uint)(y >>> 32); 1639 uint lo = cast(uint)(y & 0xFFFF_FFFF); 1640 BigDigit [] result = new BigDigit[x.length]; 1641 result[] = x[]; 1642 multibyteIncrementAssign!('-')(result[], lo); 1643 if (hi) 1644 multibyteIncrementAssign!('-')(result[1..$], hi); 1645 if (result[$-1] == 0) 1646 return result[0..$-1]; 1647 else 1648 return result; 1649 } 1650 1651 /** General unsigned multiply routine for bigints. 1652 * Sets result = x * y. 1653 * 1654 * The length of y must not be larger than the length of x. 1655 * Different algorithms are used, depending on the lengths of x and y. 1656 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the 1657 * unbalanced case. (But I doubt it would be faster in practice). 1658 * 1659 */ 1660 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 1661 pure nothrow @safe 1662 { 1663 import core.memory : GC; 1664 assert( result.length == x.length + y.length, 1665 "result array must have enough space to store computed result"); 1666 assert( y.length > 0, "y must not be empty"); 1667 assert( x.length >= y.length, "x must be greater or equal than y"); 1668 if (y.length <= KARATSUBALIMIT) 1669 { 1670 // Small multiplier, we'll just use the asm classic multiply. 1671 if (y.length == 1) 1672 { // Trivial case, no cache effects to worry about 1673 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0); 1674 return; 1675 } 1676 1677 immutable CACHELIMIT = getCacheLimit; 1678 if (x.length + y.length < CACHELIMIT) 1679 return mulSimple(result, x, y); 1680 1681 // If x is so big that it won't fit into the cache, we divide it into chunks 1682 // Every chunk must be greater than y.length. 1683 // We make the first chunk shorter, if necessary, to ensure this. 1684 1685 auto chunksize = CACHELIMIT / y.length; 1686 immutable residual = x.length % chunksize; 1687 if (residual < y.length) 1688 { 1689 chunksize -= y.length; 1690 } 1691 1692 // Use schoolbook multiply. 1693 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y); 1694 auto done = chunksize; 1695 1696 while (done < x.length) 1697 { 1698 // result[done .. done+ylength] already has a value. 1699 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done; 1700 BigDigit [KARATSUBALIMIT] partial; 1701 partial[0 .. y.length] = result[done .. done+y.length]; 1702 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y); 1703 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]); 1704 done += chunksize; 1705 } 1706 return; 1707 } 1708 1709 immutable half = (x.length >> 1) + (x.length & 1); 1710 if (2*y.length*y.length <= x.length*x.length) 1711 { 1712 // UNBALANCED MULTIPLY 1713 // Use school multiply to cut into quasi-squares of Karatsuba-size 1714 // or larger. The ratio of the two sides of the 'square' must be 1715 // between 1.414:1 and 1:1. Use Karatsuba on each chunk. 1716 // 1717 // For maximum performance, we want the ratio to be as close to 1718 // 1:1 as possible. To achieve this, we can either pad x or y. 1719 // The best choice depends on the modulus x%y. 1720 auto numchunks = x.length / y.length; 1721 auto chunksize = y.length; 1722 auto extra = x.length % y.length; 1723 auto maxchunk = chunksize + extra; 1724 bool paddingY; // true = we're padding Y, false = we're padding X. 1725 bool isExtraSmall = extra * extra * 2 < y.length * y.length; 1726 if (numchunks == 1 && isExtraSmall) 1727 { 1728 // We divide (x_first_half * y) and (x_last_half * y) 1729 // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)). 1730 // (1.414 ~ 1.707)/2:1 is balanced. 1731 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length]; 1732 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1733 scratchbuff = scratchbuff[0 .. $ - y.length]; 1734 mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff); 1735 partial[] = result[half .. half + y.length]; 1736 mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff); 1737 BigDigit c = addAssignSimple(result[half .. half + y.length], partial); 1738 if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c); 1739 if (!__ctfe) 1740 { 1741 //use free only at runtime 1742 () @trusted { GC.free(scratchbuff.ptr); } (); 1743 } 1744 } 1745 else 1746 { 1747 if (isExtraSmall) 1748 { 1749 // The leftover bit is small enough that it should be incorporated 1750 // in the existing chunks. 1751 // Make all the chunks a tiny bit bigger 1752 // (We're padding y with zeros) 1753 chunksize += extra / numchunks; 1754 extra = x.length - chunksize*numchunks; 1755 // there will probably be a few left over. 1756 // Every chunk will either have size chunksize, or chunksize+1. 1757 maxchunk = chunksize + 1; 1758 paddingY = true; 1759 assert(chunksize + extra + chunksize *(numchunks-1) == x.length, 1760 "Unexpected size"); 1761 } 1762 else 1763 { 1764 // the extra bit is large enough that it's worth making a new chunk. 1765 // (This means we're padding x with zeros, when doing the first one). 1766 maxchunk = chunksize; 1767 ++numchunks; 1768 paddingY = false; 1769 assert(extra + chunksize *(numchunks-1) == x.length, 1770 "Unexpected size"); 1771 } 1772 // We make the buffer a bit bigger so we have space for the partial sums. 1773 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length]; 1774 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1775 scratchbuff = scratchbuff[0 .. $ - y.length]; 1776 size_t done; // how much of X have we done so far? 1777 if (paddingY) 1778 { 1779 // If the first chunk is bigger, do it first. We're padding y. 1780 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )], 1781 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 1782 done = chunksize + (extra > 0 ? 1 : 0); 1783 if (extra) --extra; 1784 } 1785 else 1786 { // We're padding X. Begin with the extra bit. 1787 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff); 1788 done = extra; 1789 extra = 0; 1790 } 1791 immutable basechunksize = chunksize; 1792 while (done < x.length) 1793 { 1794 chunksize = basechunksize + (extra > 0 ? 1 : 0); 1795 if (extra) --extra; 1796 partial[] = result[done .. done+y.length]; 1797 mulKaratsuba(result[done .. done + y.length + chunksize], 1798 x[done .. done+chunksize], y, scratchbuff); 1799 addAssignSimple(result[done .. done + y.length + chunksize], partial); 1800 done += chunksize; 1801 } 1802 if (!__ctfe) 1803 { 1804 //use free only at runtime 1805 () @trusted { GC.free(scratchbuff.ptr); } (); 1806 } 1807 } 1808 } 1809 else 1810 { 1811 // Balanced. Use Karatsuba directly. 1812 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1813 mulKaratsuba(result, x, y, scratchbuff); 1814 if (!__ctfe) 1815 { 1816 //use free only at runtime 1817 () @trusted { GC.free(scratchbuff.ptr); } (); 1818 } 1819 } 1820 } 1821 1822 // https://issues.dlang.org/show_bug.cgi?id=20493 1823 @safe unittest 1824 { 1825 // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300) 1826 // the number itself isn't important, only the amount of digits, so we do a simpler 1827 // multiplication of the same size, analogous to: 1828 // 11111111 * 11111111 = 0123456787654321 1829 // but instead of base 10, it's in base `BigDigit` 1830 1831 BigDigit[398] x = 1; 1832 BigDigit[236] y = 1; 1833 BigDigit[x.length + y.length] result; 1834 mulInternal(result[], x[], y[]); 1835 1836 // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0] 1837 BigDigit[x.length + y.length] expected = y.length; 1838 foreach (BigDigit i; 0 .. y.length) 1839 { 1840 expected[i] = i+1; 1841 expected[$-1-i] = i; 1842 } 1843 1844 assert(result == expected); 1845 } 1846 1847 /** General unsigned squaring routine for BigInts. 1848 * Sets result = x*x. 1849 * NOTE: If the highest half-digit of x is zero, the highest digit of result will 1850 * also be zero. 1851 */ 1852 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe 1853 { 1854 import core.memory : GC; 1855 // Squaring is potentially half a multiply, plus add the squares of 1856 // the diagonal elements. 1857 assert(result.length == 2*x.length, 1858 "result needs to have twice the capacity of x"); 1859 if (x.length <= KARATSUBASQUARELIMIT) 1860 { 1861 if (x.length == 1) 1862 { 1863 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0); 1864 return; 1865 } 1866 return squareSimple(result, x); 1867 } 1868 // The nice thing about squaring is that it always stays balanced 1869 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1870 squareKaratsuba(result, x, scratchbuff); 1871 if (!__ctfe) 1872 { 1873 //use free only at runtime 1874 () @trusted { GC.free(scratchbuff.ptr); } (); 1875 } 1876 } 1877 1878 1879 import core.bitop : bsr; 1880 1881 /// if remainder is null, only calculate quotient. 1882 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u, 1883 const BigDigit [] v) pure nothrow @safe 1884 { 1885 import core.memory : GC; 1886 assert(quotient.length == u.length - v.length + 1, 1887 "Invalid quotient length"); 1888 assert(remainder == null || remainder.length == v.length, 1889 "Invalid remainder"); 1890 assert(v.length > 1, "v must have more than 1 element"); 1891 assert(u.length >= v.length, "u must be as longer or longer than v"); 1892 1893 // Normalize by shifting v left just enough so that 1894 // its high-order bit is on, and shift u left the 1895 // same amount. The highest bit of u will never be set. 1896 1897 BigDigit [] vn = new BigDigit[v.length]; 1898 BigDigit [] un = new BigDigit[u.length + 1]; 1899 // How much to left shift v, so that its MSB is set. 1900 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]); 1901 if (s != 0) 1902 { 1903 multibyteShl(vn, v, s); 1904 un[$-1] = multibyteShl(un[0..$-1], u, s); 1905 } 1906 else 1907 { 1908 vn[] = v[]; 1909 un[0..$-1] = u[]; 1910 un[$-1] = 0; 1911 } 1912 if (quotient.length<FASTDIVLIMIT) 1913 { 1914 schoolbookDivMod(quotient, un, vn); 1915 } 1916 else 1917 { 1918 blockDivMod(quotient, un, vn); 1919 } 1920 1921 // Unnormalize remainder, if required. 1922 if (remainder != null) 1923 { 1924 if (s == 0) remainder[] = un[0 .. vn.length]; 1925 else multibyteShr(remainder, un[0 .. vn.length+1], s); 1926 } 1927 if (!__ctfe) 1928 { 1929 //use free only at runtime 1930 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } (); 1931 } 1932 } 1933 1934 pure @safe unittest 1935 { 1936 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000]; 1937 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000]; 1938 uint [] q = new uint[u.length - v.length + 1]; 1939 uint [] r = new uint[2]; 1940 divModInternal(q, r, u, v); 1941 assert(q[]==[0xFFFF_FFFFu, 0]); 1942 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]); 1943 u = [0, 0xFFFF_FFFE, 0x8000_0001]; 1944 v = [0xFFFF_FFFF, 0x8000_0000]; 1945 divModInternal(q, r, u, v); 1946 } 1947 1948 1949 // Converts a big uint to a hexadecimal string. 1950 // 1951 // Optionally, a separator character (eg, an underscore) may be added between 1952 // every 8 digits. 1953 // buff.length must be data.length*8 if separator is zero, 1954 // or data.length*9 if separator is non-zero. It will be completely filled. 1955 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0, 1956 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 1957 { 1958 int x=0; 1959 for (ptrdiff_t i=data.length - 1; i >= 0; --i) 1960 { 1961 toHexZeroPadded(buff[x .. x+8], data[i], letterCase); 1962 x+=8; 1963 if (separator) 1964 { 1965 if (i>0) buff[x] = separator; 1966 ++x; 1967 } 1968 } 1969 return buff; 1970 } 1971 1972 /** 1973 * Convert a big uint into an octal string. 1974 * 1975 * Params: 1976 * buff = The destination buffer for the octal string. Must be large enough to 1977 * store the result, including leading zeroes, which is 1978 * ceil(data.length * BigDigitBits / 3) characters. 1979 * The buffer is filled from back to front, starting from `buff[$-1]`. 1980 * data = The biguint to be converted. 1981 * 1982 * Returns: The index of the leading non-zero digit in `buff`. Will be 1983 * `buff.length - 1` if the entire big uint is zero. 1984 */ 1985 size_t biguintToOctal(char[] buff, const(BigDigit)[] data) 1986 pure nothrow @safe @nogc 1987 { 1988 ubyte carry = 0; 1989 int shift = 0; 1990 size_t penPos = buff.length - 1; 1991 size_t lastNonZero = buff.length - 1; 1992 1993 pragma(inline) void output(uint digit) @nogc nothrow 1994 { 1995 if (digit != 0) 1996 lastNonZero = penPos; 1997 buff[penPos--] = cast(char)('0' + digit); 1998 } 1999 2000 foreach (bigdigit; data) 2001 { 2002 if (shift < 0) 2003 { 2004 // Some bits were carried over from previous word. 2005 assert(shift > -3, "shift must be greater than -3"); 2006 output(((bigdigit << -shift) | carry) & 0b111); 2007 shift += 3; 2008 assert(shift > 0, "shift must be 1 or greater"); 2009 } 2010 2011 while (shift <= BigDigitBits - 3) 2012 { 2013 output((bigdigit >>> shift) & 0b111); 2014 shift += 3; 2015 } 2016 2017 if (shift < BigDigitBits) 2018 { 2019 // Some bits need to be carried forward. 2020 carry = (bigdigit >>> shift) & 0b11; 2021 } 2022 shift -= BigDigitBits; 2023 assert(shift >= -2 && shift <= 0, "shift must in [-2,0]"); 2024 } 2025 2026 if (shift < 0) 2027 { 2028 // Last word had bits that haven't been output yet. 2029 assert(shift > -3, "Shift must be greater than -3"); 2030 output(carry); 2031 } 2032 2033 return lastNonZero; 2034 } 2035 2036 /** Convert a big uint into a decimal string. 2037 * 2038 * Params: 2039 * data The biguint to be converted. Will be destroyed. 2040 * buff The destination buffer for the decimal string. Must be 2041 * large enough to store the result, including leading zeros. 2042 * Will be filled backwards, starting from buff[$-1]. 2043 * 2044 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length. 2045 * Returns: 2046 * the lowest index of buff which was used. 2047 */ 2048 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe 2049 { 2050 ptrdiff_t sofar = buff.length; 2051 // Might be better to divide by (10^38/2^32) since that gives 38 digits for 2052 // the price of 3 divisions and a shr; this version only gives 27 digits 2053 // for 3 divisions. 2054 while (data.length>1) 2055 { 2056 uint rem = multibyteDivAssign(data, 10_0000_0000, 0); 2057 itoaZeroPadded(buff[sofar-9 .. sofar], rem); 2058 sofar -= 9; 2059 if (data[$-1] == 0 && data.length > 1) 2060 { 2061 data.length = data.length - 1; 2062 } 2063 } 2064 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]); 2065 sofar -= 10; 2066 // and strip off the leading zeros 2067 while (sofar != buff.length-1 && buff[sofar] == '0') 2068 sofar++; 2069 return sofar; 2070 } 2071 2072 /** Convert a decimal string into a big uint. 2073 * 2074 * Params: 2075 * data The biguint to be receive the result. Must be large enough to 2076 * store the result. 2077 * s The decimal string. May contain _ or 0 .. 9 2078 * 2079 * The required length for the destination buffer is slightly less than 2080 * 1 + s.length/log2(10) = 1 + s.length/3.3219. 2081 * 2082 * Returns: 2083 * the highest index of data which was used. 2084 */ 2085 int biguintFromDecimal(Range)(BigDigit[] data, Range s) 2086 if ( 2087 isInputRange!Range && 2088 isSomeChar!(ElementType!Range) && 2089 !isInfinite!Range) 2090 in 2091 { 2092 static if (hasLength!Range) 2093 assert((data.length >= 2) || (data.length == 1 && s.length == 1), 2094 "data has a invalid length"); 2095 } 2096 do 2097 { 2098 import std.conv : ConvException; 2099 2100 // Convert to base 1e19 = 10_000_000_000_000_000_000. 2101 // (this is the largest power of 10 that will fit into a long). 2102 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219. 2103 // 485 bits will only just fit into 146 decimal digits. 2104 // As we convert the string, we record the number of digits we've seen in base 19: 2105 // hi is the number of digits/19, lo is the extra digits (0 to 18). 2106 // TODO: This is inefficient for very large strings (it is O(n^^2)). 2107 // We should take advantage of fast multiplication once the numbers exceed 2108 // Karatsuba size. 2109 uint lo = 0; // number of powers of digits, 0 .. 18 2110 uint x = 0; 2111 ulong y = 0; 2112 uint hi = 0; // number of base 1e19 digits 2113 data[0] = 0; // initially number is 0. 2114 if (data.length > 1) 2115 data[1] = 0; 2116 2117 foreach (character; s) 2118 { 2119 if (character == '_') 2120 continue; 2121 2122 if (character < '0' || character > '9') 2123 throw new ConvException("invalid digit"); 2124 x *= 10; 2125 x += character - '0'; 2126 ++lo; 2127 if (lo == 9) 2128 { 2129 y = x; 2130 x = 0; 2131 } 2132 if (lo == 18) 2133 { 2134 y *= 10_0000_0000; 2135 y += x; 2136 x = 0; 2137 } 2138 if (lo == 19) 2139 { 2140 y *= 10; 2141 y += x; 2142 x = 0; 2143 // Multiply existing number by 10^19, then add y1. 2144 if (hi>0) 2145 { 2146 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A 2147 ++hi; 2148 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000 2149 ++hi; 2150 } 2151 else 2152 hi = 2; 2153 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2154 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2155 if (c != 0) 2156 { 2157 data[hi]=c; 2158 ++hi; 2159 } 2160 y = 0; 2161 lo = 0; 2162 } 2163 } 2164 // Now set y = all remaining digits. 2165 if (lo >= 18) 2166 { 2167 } 2168 else if (lo >= 9) 2169 { 2170 for (int k=9; k<lo; ++k) y*=10; 2171 y+=x; 2172 } 2173 else 2174 { 2175 for (int k=0; k<lo; ++k) y*=10; 2176 y+=x; 2177 } 2178 if (lo != 0) 2179 { 2180 if (hi == 0) 2181 { 2182 data[0] = cast(uint) y; 2183 if (data.length == 1) 2184 { 2185 hi = 1; 2186 } 2187 else 2188 { 2189 data[1] = cast(uint)(y >>> 32); 2190 hi=2; 2191 } 2192 } 2193 else 2194 { 2195 while (lo>0) 2196 { 2197 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0); 2198 if (c != 0) 2199 { 2200 data[hi]=c; 2201 ++hi; 2202 } 2203 --lo; 2204 } 2205 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2206 if (y > 0xFFFF_FFFFL) 2207 { 2208 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2209 } 2210 if (c != 0) 2211 { 2212 data[hi]=c; 2213 ++hi; 2214 } 2215 } 2216 } 2217 while (hi>1 && data[hi-1]==0) 2218 --hi; 2219 return hi; 2220 } 2221 2222 2223 // ------------------------ 2224 // These in-place functions are only for internal use; they are incompatible 2225 // with COW. 2226 2227 // Classic 'schoolbook' multiplication. 2228 void mulSimple(BigDigit[] result, const(BigDigit) [] left, 2229 const(BigDigit)[] right) pure nothrow @safe 2230 in 2231 { 2232 assert(result.length == left.length + right.length, 2233 "Result must be able to store left + right"); 2234 assert(right.length>1, "right must not be empty"); 2235 } 2236 do 2237 { 2238 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0); 2239 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 2240 } 2241 2242 // Classic 'schoolbook' squaring 2243 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe 2244 in 2245 { 2246 assert(result.length == 2*x.length, "result must be twice as long as x"); 2247 assert(x.length>1, "x must not be empty"); 2248 } 2249 do 2250 { 2251 multibyteSquare(result, x); 2252 } 2253 2254 2255 // add two uints of possibly different lengths. Result must be as long 2256 // as the larger length. 2257 // Returns carry (0 or 1). 2258 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right) 2259 pure nothrow @safe 2260 in 2261 { 2262 assert(result.length == left.length, 2263 "result and left must be of the same length"); 2264 assert(left.length >= right.length, 2265 "left must be longer or of equal length to right"); 2266 assert(right.length > 0, "right must not be empty"); 2267 } 2268 do 2269 { 2270 uint carry = multibyteAdd(result[0 .. right.length], 2271 left[0 .. right.length], right, 0); 2272 if (right.length < left.length) 2273 { 2274 result[right.length .. left.length] = left[right.length .. $]; 2275 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 2276 } 2277 return carry; 2278 } 2279 2280 /* result = result - right 2281 * Returns carry = 1 if result was less than right. 2282 */ 2283 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2284 pure nothrow @safe 2285 { 2286 assert(result.length >= right.length, 2287 "result must be longer or of equal length to right"); 2288 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0); 2289 if (c && result.length > right.length) 2290 c = multibyteIncrementAssign!('-')(result[right.length .. $], c); 2291 return c; 2292 } 2293 2294 /* result = result + right 2295 */ 2296 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2297 pure nothrow @safe 2298 { 2299 assert(result.length >= right.length, 2300 "result must be longer or of equal length to right"); 2301 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0); 2302 if (c && result.length > right.length) 2303 c = multibyteIncrementAssign!('+')(result[right.length .. $], c); 2304 return c; 2305 } 2306 2307 /* performs result += wantSub? - right : right; 2308 */ 2309 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right, 2310 bool wantSub) pure nothrow @safe 2311 { 2312 if (wantSub) 2313 return subAssignSimple(result, right); 2314 else 2315 return addAssignSimple(result, right); 2316 } 2317 2318 2319 // return true if x<y, considering leading zeros 2320 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe 2321 { 2322 assert(x.length >= y.length, 2323 "x must be longer or of equal length to y"); 2324 auto k = x.length-1; 2325 while (x[k]==0 && k >= y.length) 2326 --k; 2327 if (k >= y.length) 2328 return false; 2329 while (k>0 && x[k]==y[k]) 2330 --k; 2331 return x[k] < y[k]; 2332 } 2333 2334 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y. 2335 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 2336 pure nothrow @safe 2337 { 2338 assert(result.length == ((x.length >= y.length) ? x.length : y.length), 2339 "result must capable to store the maximum of x and y"); 2340 2341 size_t minlen; 2342 bool negative; 2343 if (x.length >= y.length) 2344 { 2345 minlen = y.length; 2346 negative = less(x, y); 2347 } 2348 else 2349 { 2350 minlen = x.length; 2351 negative = !less(y, x); 2352 } 2353 const (BigDigit)[] large, small; 2354 if (negative) 2355 { 2356 large = y; small = x; 2357 } 2358 else 2359 { 2360 large = x; small = y; 2361 } 2362 2363 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0); 2364 if (x.length != y.length) 2365 { 2366 result[minlen .. large.length]= large[minlen..$]; 2367 result[large.length..$] = 0; 2368 if (carry) 2369 multibyteIncrementAssign!('-')(result[minlen..$], carry); 2370 } 2371 return negative; 2372 } 2373 2374 /* Determine how much space is required for the temporaries 2375 * when performing a Karatsuba multiplication. 2376 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see: 2377 * https://issues.dlang.org/show_bug.cgi?id=20493 2378 */ 2379 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe 2380 { 2381 return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4; 2382 } 2383 2384 /* Sets result = x*y, using Karatsuba multiplication. 2385 * x must be longer or equal to y. 2386 * Valid only for balanced multiplies, where x is not shorter than y. 2387 * It is superior to schoolbook multiplication if and only if 2388 * sqrt(2)*y.length > x.length > y.length. 2389 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2) 2390 * The maximum allowable length of x and y is uint.max; but better algorithms 2391 * should be used far before that length is reached. 2392 * Params: 2393 * scratchbuff An array long enough to store all the temporaries. Will be destroyed. 2394 */ 2395 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x, 2396 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe 2397 { 2398 assert(x.length >= y.length, "x must be greater or equal to y"); 2399 assert(result.length < uint.max, "Operands too large"); 2400 assert(result.length == x.length + y.length, 2401 "result must be as large as x + y"); 2402 if (x.length <= KARATSUBALIMIT) 2403 { 2404 return mulSimple(result, x, y); 2405 } 2406 // Must be almost square (otherwise, a schoolbook iteration is better) 2407 assert(2L * y.length * y.length > (x.length-1) * (x.length-1), 2408 "Bigint Internal Error: Asymmetric Karatsuba"); 2409 2410 // The subtractive version of Karatsuba multiply uses the following result: 2411 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid) 2412 // where mid = (x0-x1)*(y0-y1) 2413 // requiring 3 multiplies of length N, instead of 4. 2414 // The advantage of the subtractive over the additive version is that 2415 // the mid multiply cannot exceed length N. But there are subtleties: 2416 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we 2417 // retain all of the leading zeros in the subtractions 2418 2419 // half length, round up. 2420 auto half = (x.length >> 1) + (x.length & 1); 2421 2422 const(BigDigit) [] x0 = x[0 .. half]; 2423 const(BigDigit) [] x1 = x[half .. $]; 2424 const(BigDigit) [] y0 = y[0 .. half]; 2425 const(BigDigit) [] y1 = y[half .. $]; 2426 BigDigit [] mid = scratchbuff[0 .. half*2]; 2427 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2428 BigDigit [] resultLow = result[0 .. 2*half]; 2429 BigDigit [] resultHigh = result[2*half .. $]; 2430 // initially use result to store temporaries 2431 BigDigit [] xdiff= result[0 .. half]; 2432 BigDigit [] ydiff = result[half .. half*2]; 2433 2434 // First, we calculate mid, and sign of mid 2435 immutable bool midNegative = inplaceSub(xdiff, x0, x1) 2436 ^ inplaceSub(ydiff, y0, y1); 2437 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff); 2438 2439 // Low half of result gets x0 * y0. High half gets x1 * y1 2440 2441 mulKaratsuba(resultLow, x0, y0, newscratchbuff); 2442 2443 if (2L * y1.length * y1.length < x1.length * x1.length) 2444 { 2445 // an asymmetric situation has been created. 2446 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1. 2447 // Applying one schoolbook multiply gives us two pieces each 1.2:1 2448 if (y1.length <= KARATSUBALIMIT) 2449 mulSimple(resultHigh, x1, y1); 2450 else 2451 { 2452 // divide x1 in two, then use schoolbook multiply on the two pieces. 2453 auto quarter = (x1.length >> 1) + (x1.length & 1); 2454 immutable ysmaller = (quarter >= y1.length); 2455 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1, 2456 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff); 2457 // Save the part which will be overwritten. 2458 immutable ysmaller2 = ((x1.length - quarter) >= y1.length); 2459 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length]; 2460 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1, 2461 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]); 2462 2463 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]); 2464 } 2465 } 2466 else 2467 mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 2468 2469 /* We now have result = x0y0 + (N*N)*x1y1 2470 Before adding or subtracting mid, we must calculate 2471 result += N * (x0y0 + x1y1) 2472 We can do this with three half-length additions. With a = x0y0, b = x1y1: 2473 aHI aLO 2474 + aHI aLO 2475 + bHI bLO 2476 + bHI bLO 2477 = R3 R2 R1 R0 2478 R1 = aHI + bLO + aLO 2479 R2 = aHI + bLO + aHI + carry_from_R1 2480 R3 = bHi + carry_from_R2 2481 2482 It might actually be quicker to do it in two full-length additions: 2483 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]); 2484 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]); 2485 */ 2486 BigDigit[] R1 = result[half .. half*2]; 2487 BigDigit[] R2 = result[half*2 .. half*3]; 2488 BigDigit[] R3 = result[half*3..$]; 2489 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2490 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2491 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2492 if (c1+c2) 2493 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2494 if (c1+c3) 2495 multibyteIncrementAssign!('+')(R3, c1+c3); 2496 2497 // And finally we subtract mid 2498 addOrSubAssignSimple(result[half..$], mid, !midNegative); 2499 } 2500 2501 void squareKaratsuba(BigDigit [] result, const BigDigit [] x, 2502 BigDigit [] scratchbuff) pure nothrow @safe 2503 { 2504 // See mulKaratsuba for implementation comments. 2505 // Squaring is simpler, since it never gets asymmetric. 2506 assert(result.length < uint.max, "Operands too large"); 2507 assert(result.length == 2*x.length, 2508 "result must be twice the length of x"); 2509 if (x.length <= KARATSUBASQUARELIMIT) 2510 { 2511 return squareSimple(result, x); 2512 } 2513 // half length, round up. 2514 auto half = (x.length >> 1) + (x.length & 1); 2515 2516 const(BigDigit)[] x0 = x[0 .. half]; 2517 const(BigDigit)[] x1 = x[half .. $]; 2518 BigDigit [] mid = scratchbuff[0 .. half*2]; 2519 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2520 // initially use result to store temporaries 2521 BigDigit [] xdiff= result[0 .. half]; 2522 const BigDigit [] ydiff = result[half .. half*2]; 2523 2524 // First, we calculate mid. We don't need its sign 2525 inplaceSub(xdiff, x0, x1); 2526 squareKaratsuba(mid, xdiff, newscratchbuff); 2527 2528 // Set result = x0x0 + (N*N)*x1x1 2529 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff); 2530 squareKaratsuba(result[2*half .. $], x1, newscratchbuff); 2531 2532 /* result += N * (x0x0 + x1x1) 2533 Do this with three half-length additions. With a = x0x0, b = x1x1: 2534 R1 = aHI + bLO + aLO 2535 R2 = aHI + bLO + aHI + carry_from_R1 2536 R3 = bHi + carry_from_R2 2537 */ 2538 BigDigit[] R1 = result[half .. half*2]; 2539 BigDigit[] R2 = result[half*2 .. half*3]; 2540 BigDigit[] R3 = result[half*3..$]; 2541 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2542 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2543 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2544 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2545 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 2546 2547 // And finally we subtract mid, which is always positive 2548 subAssignSimple(result[half..$], mid); 2549 } 2550 2551 /* Knuth's Algorithm D, as presented in 2552 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002). 2553 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18. 2554 * Given u and v, calculates quotient = u / v, u = u % v. 2555 * v must be normalized (ie, the MSB of v must be 1). 2556 * The most significant words of quotient and u may be zero. 2557 * u[0 .. v.length] holds the remainder. 2558 */ 2559 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2560 pure nothrow @safe 2561 { 2562 assert(quotient.length == u.length - v.length, 2563 "quotient has wrong length"); 2564 assert(v.length > 1, "v must not be empty"); 2565 assert(u.length >= v.length, "u must be larger or equal to v"); 2566 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2567 assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]"); 2568 // BUG: This code only works if BigDigit is uint. 2569 uint vhi = v[$-1]; 2570 uint vlo = v[$-2]; 2571 2572 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--) 2573 { 2574 // Compute estimate of quotient[j], 2575 // qhat = (three most significant words of u)/(two most sig words of v). 2576 uint qhat; 2577 if (u[j + v.length] == vhi) 2578 { 2579 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001) 2580 qhat = uint.max; 2581 } 2582 else 2583 { 2584 uint ulo = u[j + v.length - 2]; 2585 version (D_InlineAsm_X86) 2586 { 2587 // Note: On DMD, this is only ~10% faster than the non-asm code. 2588 uint *p = &u[j + v.length - 1]; 2589 asm pure nothrow @trusted 2590 { 2591 mov EAX, p; 2592 mov EDX, [EAX+4]; 2593 mov EAX, [EAX]; 2594 div dword ptr [vhi]; 2595 mov qhat, EAX; 2596 mov ECX, EDX; 2597 div3by2correction: 2598 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo 2599 sub EAX, ulo; 2600 sbb EDX, ECX; 2601 jbe div3by2done; 2602 mov EAX, qhat; 2603 dec EAX; 2604 mov qhat, EAX; 2605 add ECX, dword ptr [vhi]; 2606 jnc div3by2correction; 2607 div3by2done: ; 2608 } 2609 } 2610 else 2611 { // version (InlineAsm) 2612 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1]; 2613 immutable bigqhat = uu / vhi; 2614 ulong rhat = uu - bigqhat * vhi; 2615 qhat = cast(uint) bigqhat; 2616 again: 2617 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo)) 2618 { 2619 --qhat; 2620 rhat += vhi; 2621 if (!(rhat & 0xFFFF_FFFF_0000_0000L)) 2622 goto again; 2623 } 2624 } // version (InlineAsm) 2625 } 2626 // Multiply and subtract. 2627 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0); 2628 2629 if (u[j+v.length] < carry) 2630 { 2631 // If we subtracted too much, add back 2632 --qhat; 2633 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0); 2634 } 2635 quotient[j] = qhat; 2636 u[j + v.length] = u[j + v.length] - carry; 2637 } 2638 } 2639 2640 // TODO: Replace with a library call 2641 void itoaZeroPadded(char[] output, uint value) 2642 pure nothrow @safe @nogc 2643 { 2644 for (auto i = output.length; i--;) 2645 { 2646 if (value < 10) 2647 { 2648 output[i] = cast(char)(value + '0'); 2649 value = 0; 2650 } 2651 else 2652 { 2653 output[i] = cast(char)(value % 10 + '0'); 2654 value /= 10; 2655 } 2656 } 2657 } 2658 2659 void toHexZeroPadded(char[] output, uint value, 2660 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 2661 { 2662 ptrdiff_t x = output.length - 1; 2663 static immutable string upperHexDigits = "0123456789ABCDEF"; 2664 static immutable string lowerHexDigits = "0123456789abcdef"; 2665 for ( ; x >= 0; --x) 2666 { 2667 if (letterCase == LetterCase.upper) 2668 { 2669 output[x] = upperHexDigits[value & 0xF]; 2670 } 2671 else 2672 { 2673 output[x] = lowerHexDigits[value & 0xF]; 2674 } 2675 value >>= 4; 2676 } 2677 } 2678 2679 // Returns the highest value of i for which left[i]!=right[i], 2680 // or 0 if left[] == right[] 2681 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right) 2682 pure nothrow @nogc @safe 2683 { 2684 assert(left.length == right.length, 2685 "left have a length equal to that of right"); 2686 for (ptrdiff_t i = left.length - 1; i>0; --i) 2687 { 2688 if (left[i] != right[i]) 2689 return i; 2690 } 2691 return 0; 2692 } 2693 2694 // Returns the lowest value of i for which x[i]!=0. 2695 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe 2696 { 2697 int k = 0; 2698 while (x[k]==0) 2699 { 2700 ++k; 2701 assert(k < x.length, "k must be less than x.length"); 2702 } 2703 return k; 2704 } 2705 2706 /* 2707 Calculate quotient and remainder of u / v using fast recursive division. 2708 v must be normalised, and must be at least half as long as u. 2709 Given u and v, v normalised, calculates quotient = u/v, u = u%v. 2710 scratch is temporary storage space, length must be >= quotient + 1. 2711 2712 Returns: 2713 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted. 2714 2715 Implements algorithm 1.8 from MCA. 2716 This algorithm has an annoying special case. After the first recursion, the 2717 highest bit of the quotient may be set. This means that in the second 2718 recursive call, the 'in' contract would be violated. (This happens only 2719 when the top quarter of u is equal to the top half of v. A base 10 2720 equivalent example of this situation is 5517/56; the first step gives 2721 55/5 = 11). To maintain the in contract, we pad a zero to the top of both 2722 u and the quotient. 'mayOverflow' indicates that that the special case 2723 has occurred. 2724 (In MCA, a different strategy is used: the in contract is weakened, and 2725 schoolbookDivMod is more general: it allows the high bit of u to be set). 2726 See also: 2727 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 2728 Max-Planck Institute fuer Informatik, (Oct 1998). 2729 */ 2730 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v, 2731 BigDigit[] scratch, bool mayOverflow = false) 2732 pure nothrow @safe 2733 in 2734 { 2735 // v must be normalized 2736 assert(v.length > 1, "v must not be empty"); 2737 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2738 assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]"); 2739 assert(quotient.length == u.length - v.length, 2740 "quotient must be of equal length of u - v"); 2741 if (mayOverflow) 2742 { 2743 assert(u[$-1] == 0, "Invalid value at u[$ - 1]"); 2744 assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]"); 2745 } 2746 2747 // Must be symmetric. Use block schoolbook division if not. 2748 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length, 2749 "Invalid length of u"); 2750 assert((mayOverflow ? u.length-1 : u.length) >= v.length, 2751 "Invalid length of u"); 2752 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1), 2753 "Invalid quotient length"); 2754 } 2755 do 2756 { 2757 if (quotient.length < FASTDIVLIMIT) 2758 { 2759 return schoolbookDivMod(quotient, u, v); 2760 } 2761 2762 // Split quotient into two halves, but keep padding in the top half 2763 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1; 2764 2765 // RECURSION 1: Calculate the high half of the quotient 2766 2767 // Note that if u and quotient were padded, they remain padded during 2768 // this call, so in contract is satisfied. 2769 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $], 2770 scratch, mayOverflow); 2771 2772 // quotient[k..$] is our guess at the high quotient. 2773 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the 2774 // first remainder. u[0 .. 2*k] is the low part. 2775 2776 // Calculate the full first remainder to be 2777 // remainder - highQuotient * lowDivisor 2778 // reducing highQuotient until the remainder is positive. 2779 // The low part of the remainder, u[0 .. k], cannot be altered by this. 2780 2781 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k, 2782 scratch[0 .. quotient.length], mayOverflow); 2783 2784 // RECURSION 2: Calculate the low half of the quotient 2785 // The full first remainder is now in u[0 .. k + v.length]. 2786 2787 if (u[k + v.length - 1] & 0x8000_0000) 2788 { 2789 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001. 2790 // This means we need an extra quotient word for the next recursion. 2791 // We need to restore the invariant for the recursive calls. 2792 // We do this by padding both u and quotient. Extending u is trivial, 2793 // because the higher words will not be used again. But for the 2794 // quotient, we're clobbering the low word of the high quotient, 2795 // so we need save it, and add it back in after the recursive call. 2796 2797 auto clobberedQuotient = quotient[k]; 2798 u[k+v.length] = 0; 2799 2800 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1], 2801 v[k .. $], scratch, true); 2802 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k, 2803 scratch[0 .. 2 * k+1], true); 2804 2805 // Now add the quotient word that got clobbered earlier. 2806 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient); 2807 } 2808 else 2809 { 2810 // The special case has NOT happened. 2811 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $], 2812 scratch, false); 2813 2814 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length] 2815 2816 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k, 2817 scratch[0 .. 2 * k]); 2818 } 2819 } 2820 2821 // rem -= quot * v[0 .. k]. 2822 // If would make rem negative, decrease quot until rem is >= 0. 2823 // Needs (quot.length * k) scratch space to store the result of the multiply. 2824 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v, 2825 ptrdiff_t k, 2826 BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe 2827 { 2828 assert(rem.length == v.length, "rem must be as long as v"); 2829 mulInternal(scratch, quot, v[0 .. k]); 2830 uint carry = 0; 2831 if (mayOverflow) 2832 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]); 2833 else 2834 carry = subAssignSimple(rem, scratch); 2835 while (carry) 2836 { 2837 multibyteIncrementAssign!('-')(quot, 1); // quot-- 2838 carry -= multibyteAdd(rem, rem, v, 0); 2839 } 2840 } 2841 2842 // Cope with unbalanced division by performing block schoolbook division. 2843 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2844 pure nothrow @safe 2845 { 2846 import core.memory : GC; 2847 assert(quotient.length == u.length - v.length, 2848 "quotient must be of equal length of u - v"); 2849 assert(v.length > 1, "v must not be empty"); 2850 assert(u.length >= v.length, "u must be longer or of equal length as v"); 2851 assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]"); 2852 assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]"); 2853 BigDigit [] scratch = new BigDigit[v.length + 1]; 2854 2855 // Perform block schoolbook division, with 'v.length' blocks. 2856 auto m = u.length - v.length; 2857 while (m > v.length) 2858 { 2859 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0; 2860 BigDigit saveq; 2861 if (mayOverflow) 2862 { 2863 u[m + v.length] = 0; 2864 saveq = quotient[m]; 2865 } 2866 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)], 2867 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow); 2868 if (mayOverflow) 2869 { 2870 assert(quotient[m] == 0, "quotient must not be 0"); 2871 quotient[m] = saveq; 2872 } 2873 m -= v.length; 2874 } 2875 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch); 2876 if (!__ctfe) 2877 { 2878 //use free only at runtime 2879 () @trusted { GC.free(scratch.ptr); } (); 2880 } 2881 } 2882 2883 @system unittest 2884 { 2885 version (none) 2886 { 2887 import core.stdc.stdio; 2888 2889 void printBiguint(const uint [] data) 2890 { 2891 char [] buff = biguintToHex(new char[data.length*9], data, '_'); 2892 printf("%.*s\n", cast(int) buff.length, buff.ptr); 2893 } 2894 2895 void printDecimalBigUint(BigUint data) 2896 { 2897 auto str = data.toDecimalString(0); 2898 printf("%.*s\n", cast(int) str.length, str.ptr); 2899 } 2900 } 2901 2902 uint [] a, b; 2903 a = new uint[43]; 2904 b = new uint[179]; 2905 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i; 2906 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546; 2907 2908 a[$-1] |= 0x8000_0000; 2909 uint [] r = new uint[a.length]; 2910 uint [] q = new uint[b.length-a.length+1]; 2911 2912 divModInternal(q, r, b, a); 2913 q = q[0..$-1]; 2914 uint [] r1 = r.dup; 2915 uint [] q1 = q.dup; 2916 blockDivMod(q, b, a); 2917 r = b[0 .. a.length]; 2918 assert(r[] == r1[]); 2919 assert(q[] == q1[]); 2920 } 2921 2922 // biguintToOctal 2923 @safe unittest 2924 { 2925 enum bufSize = 5 * BigDigitBits / 3 + 1; 2926 auto buf = new char[bufSize]; 2927 size_t i; 2928 BigDigit[] data = [ 342391 ]; 2929 2930 // Basic functionality with single word 2931 i = biguintToOctal(buf, data); 2932 assert(i == bufSize - 7 && buf[i .. $] == "1234567"); 2933 2934 // Test carrying bits between words 2935 data = [ 0x77053977, 0x39770539, 0x00000005 ]; 2936 i = biguintToOctal(buf, data); 2937 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567"); 2938 2939 // Test carried bits in the last word 2940 data = [ 0x80000000 ]; 2941 i = biguintToOctal(buf, data); 2942 assert(buf[i .. $] == "20000000000"); 2943 2944 // Test boundary between 3rd and 4th word where the number of bits is 2945 // divisible by 3 and no bits should be carried. 2946 // 2947 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the 2948 // rollover happens, so that if any bugs happen in wrongly adding the carry 2949 // to the next word, non-zero bits will show up in the output. 2950 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ]; 2951 i = biguintToOctal(buf, data); 2952 assert(buf[i .. $] == "2060000000001400000000030000000000"); 2953 2954 // Boundary case: 0 2955 data = [ 0 ]; 2956 i = biguintToOctal(buf, data); 2957 assert(buf[i .. $] == "0"); 2958 }