1 /** Fundamental operations for arbitrary-precision arithmetic 2 * 3 * These functions are for internal use only. 4 */ 5 /* Copyright Don Clugston 2008 - 2010. 6 * Distributed under the Boost Software License, Version 1.0. 7 * (See accompanying file LICENSE_1_0.txt or copy at 8 * http://www.boost.org/LICENSE_1_0.txt) 9 */ 10 /* References: 11 "Modern Computer Arithmetic" (MCA) is the primary reference for all 12 algorithms used in this library. 13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 14 Version 0.5.9, (Oct 2010). 15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 16 Max-Planck Institute fuer Informatik, (Oct 1998). 17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.", 18 INRIA 4664, (Dec 2002). 19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?", 20 http://bodrato.it/papers (2006). 21 - A. Fog, "Optimizing subroutines in assembly language", 22 www.agner.org/optimize (2008). 23 - A. Fog, "The microarchitecture of Intel and AMD CPU's", 24 www.agner.org/optimize (2008). 25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs 26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008). 27 28 Idioms: 29 Many functions in this module use 30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)' 31 in order to disable implicit conversion. 32 33 */ 34 module std.internal.math.biguintcore; 35 36 version (D_InlineAsm_X86) 37 { 38 static import std.internal.math.biguintx86; 39 } 40 static import std.internal.math.biguintnoasm; 41 42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT, 43 KARATSUBASQUARELIMIT; 44 45 alias multibyteAdd = multibyteAddSub!('+'); 46 alias multibyteSub = multibyteAddSub!('-'); 47 48 private import std.traits; 49 private import std.range.primitives; 50 public import std.ascii : LetterCase; 51 import std.range.primitives; 52 import std.traits; 53 54 private: 55 56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for 57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's 58 // used by the other architectures too. 59 // See comments below in case it has to be refactored. 60 version (X86) 61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry) 62 { 63 // must be checked before, otherwise D_InlineAsm_X86 is true. 64 if (__ctfe) 65 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 66 // Runtime. 67 else version (D_InlineAsm_X86) 68 return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry); 69 // Runtime if no asm available. 70 else 71 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 72 } 73 // Any other architecture 74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub; 75 76 version (X86) 77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) 78 { 79 if (__ctfe) 80 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 81 else version (D_InlineAsm_X86) 82 return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry); 83 else 84 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 85 } 86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign; 87 88 version (X86) 89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits) 90 { 91 if (__ctfe) 92 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 93 else version (D_InlineAsm_X86) 94 return std.internal.math.biguintx86.multibyteShl(dest, src, numbits); 95 else 96 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 97 } 98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl; 99 100 version (X86) 101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits) 102 { 103 if (__ctfe) 104 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 105 else version (D_InlineAsm_X86) 106 std.internal.math.biguintx86.multibyteShr(dest, src, numbits); 107 else 108 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 109 } 110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr; 111 112 version (X86) 113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 114 { 115 if (__ctfe) 116 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 117 else version (D_InlineAsm_X86) 118 return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry); 119 else 120 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 121 } 122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul; 123 124 version (X86) 125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 126 { 127 if (__ctfe) 128 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 129 else version (D_InlineAsm_X86) 130 return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry); 131 else 132 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 133 } 134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd; 135 136 version (X86) 137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right) 138 { 139 if (__ctfe) 140 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 141 else version (D_InlineAsm_X86) 142 std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right); 143 else 144 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 145 } 146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate; 147 148 version (X86) 149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow) 150 { 151 if (__ctfe) 152 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 153 else version (D_InlineAsm_X86) 154 return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow); 155 else 156 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 157 } 158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign; 159 160 version (X86) 161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src) 162 { 163 if (__ctfe) 164 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 165 else version (D_InlineAsm_X86) 166 std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src); 167 else 168 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 169 } 170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares; 171 172 version (X86) 173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x) 174 { 175 if (__ctfe) 176 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 177 else version (D_InlineAsm_X86) 178 std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x); 179 else 180 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 181 } 182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate; 183 184 version (X86) 185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x) 186 { 187 if (__ctfe) 188 std.internal.math.biguintnoasm.multibyteSquare(result, x); 189 else version (D_InlineAsm_X86) 190 std.internal.math.biguintx86.multibyteSquare(result, x); 191 else 192 std.internal.math.biguintnoasm.multibyteSquare(result, x); 193 } 194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare; 195 196 // Limits for when to switch between algorithms. 197 // Half the size of the data cache. 198 @nogc nothrow pure @safe size_t getCacheLimit() 199 { 200 import core.cpuid : dataCaches; 201 return dataCaches[0].size * 1024 / 2; 202 } 203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division 204 205 206 // These constants are used by shift operations 207 static if (BigDigit.sizeof == int.sizeof) 208 { 209 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 } 210 alias BIGHALFDIGIT = ushort; 211 } 212 else static if (BigDigit.sizeof == long.sizeof) 213 { 214 alias BIGHALFDIGIT = uint; 215 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 } 216 } 217 else static assert(0, "Unsupported BigDigit size"); 218 219 import std.exception : assumeUnique; 220 import std.traits : isIntegral; 221 enum BigDigitBits = BigDigit.sizeof*8; 222 template maxBigDigits(T) 223 if (isIntegral!T) 224 { 225 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof; 226 } 227 228 static immutable BigDigit[] ZERO = [0]; 229 static immutable BigDigit[] ONE = [1]; 230 static immutable BigDigit[] TWO = [2]; 231 static immutable BigDigit[] TEN = [10]; 232 233 234 public: 235 236 /// BigUint performs memory management and wraps the low-level calls. 237 struct BigUint 238 { 239 private: 240 pure invariant() 241 { 242 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ), 243 "Invariant requires data to not empty or zero"); 244 } 245 246 immutable(BigDigit) [] data = ZERO; 247 248 this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe 249 { 250 data = x; 251 } 252 package(std) // used from: std.bigint 253 this(T)(T x) pure nothrow @safe scope 254 if (isIntegral!T) 255 { 256 opAssign(x); 257 } 258 259 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc { 260 return assumeUnique(input); 261 }; 262 public: 263 // Length in uints 264 @property size_t uintLength() pure nothrow const @safe @nogc scope 265 { 266 static if (BigDigit.sizeof == uint.sizeof) 267 { 268 return data.length; 269 } 270 else static if (BigDigit.sizeof == ulong.sizeof) 271 { 272 return data.length * 2 - 273 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0); 274 } 275 } 276 @property size_t ulongLength() pure nothrow const @safe @nogc scope 277 { 278 static if (BigDigit.sizeof == uint.sizeof) 279 { 280 return (data.length + 1) >> 1; 281 } 282 else static if (BigDigit.sizeof == ulong.sizeof) 283 { 284 return data.length; 285 } 286 } 287 288 // The value at (cast(ulong[]) data)[n] 289 ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope 290 { 291 static if (BigDigit.sizeof == int.sizeof) 292 { 293 if (data.length == n*2 + 1) return data[n*2]; 294 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 ); 295 } 296 else static if (BigDigit.sizeof == long.sizeof) 297 { 298 return data[n]; 299 } 300 } 301 302 uint peekUint(size_t n) pure nothrow const @safe @nogc scope 303 { 304 static if (BigDigit.sizeof == int.sizeof) 305 { 306 return data[n]; 307 } 308 else 309 { 310 immutable x = data[n >> 1]; 311 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x; 312 } 313 } 314 315 /// 316 void opAssign(Tulong)(Tulong u) pure nothrow @safe scope 317 if (is (Tulong == ulong)) 318 { 319 if (u == 0) data = ZERO; 320 else if (u == 1) data = ONE; 321 else if (u == 2) data = TWO; 322 else if (u == 10) data = TEN; 323 else 324 { 325 static if (BigDigit.sizeof == int.sizeof) 326 { 327 uint ulo = cast(uint)(u & 0xFFFF_FFFF); 328 uint uhi = cast(uint)(u >> 32); 329 if (uhi == 0) 330 { 331 data = [ulo]; 332 } 333 else 334 { 335 data = [ulo, uhi]; 336 } 337 } 338 else static if (BigDigit.sizeof == long.sizeof) 339 { 340 data = [u]; 341 } 342 } 343 } 344 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope 345 { 346 this.data = y.data; 347 } 348 349 /// 350 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope 351 { 352 if (data.length != y.data.length) 353 return (data.length > y.data.length) ? 1 : -1; 354 size_t k = highestDifferentDigit(data, y.data); 355 if (data[k] == y.data[k]) 356 return 0; 357 return data[k] > y.data[k] ? 1 : -1; 358 } 359 360 /// 361 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope 362 if (is (Tulong == ulong)) 363 { 364 if (data.length > maxBigDigits!Tulong) 365 return 1; 366 367 foreach_reverse (i; 0 .. maxBigDigits!Tulong) 368 { 369 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits)); 370 if (tmp == 0) 371 if (data.length >= i+1) 372 { 373 // Since ZERO is [0], so we cannot simply return 1 here, as 374 // data[i] would be 0 for i == 0 in that case. 375 return (data[i] > 0) ? 1 : 0; 376 } 377 else 378 continue; 379 else 380 if (i+1 > data.length) 381 return -1; 382 else if (tmp != data[i]) 383 return data[i] > tmp ? 1 : -1; 384 } 385 return 0; 386 } 387 388 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope 389 { 390 return y.data[] == data[]; 391 } 392 393 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope 394 { 395 if (data.length > 2) 396 return false; 397 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 398 uint yhi = cast(uint)(y >> 32); 399 if (data.length == 2 && data[1]!=yhi) 400 return false; 401 if (data.length == 1 && yhi != 0) 402 return false; 403 return (data[0] == ylo); 404 } 405 406 bool isZero() pure const nothrow @safe @nogc scope 407 { 408 return data.length == 1 && data[0] == 0; 409 } 410 411 size_t numBytes() pure nothrow const @safe @nogc scope 412 { 413 return data.length * BigDigit.sizeof; 414 } 415 416 // the extra bytes are added to the start of the string 417 char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope 418 { 419 immutable predictlength = 20+20*(data.length/2); // just over 19 420 char [] buff = new char[frontExtraBytes + predictlength]; 421 ptrdiff_t sofar = biguintToDecimal(buff, data.dup); 422 return buff[sofar-frontExtraBytes..$]; 423 } 424 425 /** Convert to a hex string, printing a minimum number of digits 'minPadding', 426 * allocating an additional 'frontExtraBytes' at the start of the string. 427 * Padding is done with padChar, which may be '0' or ' '. 428 * 'separator' is a digit separation character. If non-zero, it is inserted 429 * between every 8 digits. 430 * Separator characters do not contribute to the minPadding. 431 */ 432 char [] toHexString(int frontExtraBytes, char separator = 0, 433 int minPadding=0, char padChar = '0', 434 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope 435 { 436 // Calculate number of extra padding bytes 437 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof) 438 ? minPadding - data.length * 2 * BigDigit.sizeof : 0; 439 440 // Length not including separator bytes 441 size_t lenBytes = data.length * 2 * BigDigit.sizeof; 442 443 // Calculate number of separator bytes 444 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0; 445 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0; 446 447 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes]; 448 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase); 449 if (extraPad > 0) 450 { 451 if (separator) 452 { 453 size_t start = frontExtraBytes; // first index to pad 454 if (extraPad &7) 455 { 456 // Do 1 to 7 extra zeros. 457 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar; 458 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator); 459 start += (extraPad & 7) + 1; 460 } 461 for (int i=0; i< (extraPad >> 3); ++i) 462 { 463 buff[start .. start + 8] = padChar; 464 buff[start + 8] = (padChar == ' ' ? ' ' : separator); 465 start += 9; 466 } 467 } 468 else 469 { 470 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar; 471 } 472 } 473 int z = frontExtraBytes; 474 if (lenBytes > minPadding) 475 { 476 // Strip leading zeros. 477 ptrdiff_t maxStrip = lenBytes - minPadding; 478 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0) 479 { 480 ++z; 481 --maxStrip; 482 } 483 } 484 if (padChar!='0') 485 { 486 // Convert leading zeros into padChars. 487 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k) 488 { 489 if (buff[k]=='0') buff[k]=padChar; 490 } 491 } 492 return buff[z-frontExtraBytes..$]; 493 } 494 495 /** 496 * Convert to an octal string. 497 */ 498 char[] toOctalString() pure nothrow @safe const scope 499 { 500 auto predictLength = 1 + data.length*BigDigitBits / 3; 501 char[] buff = new char[predictLength]; 502 size_t firstNonZero = biguintToOctal(buff, data); 503 return buff[firstNonZero .. $]; 504 } 505 506 // return false if invalid character found 507 bool fromHexString(Range)(Range s) scope 508 if (isBidirectionalRange!Range && isSomeChar!(ElementType!Range)) 509 { 510 import std.range : walkLength; 511 512 //Strip leading zeros 513 while (!s.empty && s.front == '0') 514 s.popFront; 515 516 if (s.empty) 517 { 518 data = ZERO; 519 return true; 520 } 521 522 immutable len = (s.save.walkLength + 15) / 4; 523 auto tmp = new BigDigit[len + 1]; 524 uint part, sofar, partcount; 525 526 foreach_reverse (character; s) 527 { 528 if (character == '_') 529 continue; 530 531 uint x; 532 if (character >= '0' && character <= '9') 533 { 534 x = character - '0'; 535 } 536 else if (character >= 'A' && character <= 'F') 537 { 538 x = character - 'A' + 10; 539 } 540 else if (character >= 'a' && character <= 'f') 541 { 542 x = character - 'a' + 10; 543 } 544 else 545 { 546 return false; 547 } 548 549 part >>= 4; 550 part |= (x << (32 - 4)); 551 ++partcount; 552 553 if (partcount == 8) 554 { 555 tmp[sofar] = part; 556 ++sofar; 557 partcount = 0; 558 part = 0; 559 } 560 } 561 if (part) 562 { 563 for ( ; partcount != 8; ++partcount) part >>= 4; 564 tmp[sofar] = part; 565 ++sofar; 566 } 567 if (sofar == 0) 568 data = ZERO; 569 else 570 data = trustedAssumeUnique(tmp[0 .. sofar]); 571 572 return true; 573 } 574 575 // return true if OK; false if erroneous characters found 576 bool fromDecimalString(Range)(Range s) scope 577 if (isForwardRange!Range && isSomeChar!(ElementType!Range)) 578 { 579 import std.range : walkLength; 580 581 while (!s.empty && s.front == '0') 582 { 583 s.popFront; 584 } 585 586 if (s.empty) 587 { 588 data = ZERO; 589 return true; 590 } 591 592 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19; 593 auto tmp = new BigDigit[predict_length]; 594 595 tmp.length = biguintFromDecimal(tmp, s); 596 597 data = trustedAssumeUnique(tmp); 598 return true; 599 } 600 601 void fromMagnitude(Range)(Range magnitude) scope 602 if (isInputRange!Range 603 && (isForwardRange!Range || hasLength!Range) 604 && isUnsigned!(ElementType!Range)) 605 { 606 while (!magnitude.empty && magnitude.front == 0) 607 magnitude.popFront; 608 static if (hasLength!Range) 609 immutable inputLen = magnitude.length; 610 else 611 immutable inputLen = magnitude.save.walkLength; 612 if (!inputLen) 613 { 614 this.data = ZERO; 615 return; 616 } 617 // `magnitude` has its most significant element first but BigUint.data 618 // stores the most significant last. 619 BigDigit[] newDigits; 620 alias E = ElementType!Range; 621 static if (E.sizeof == BigDigit.sizeof) 622 { 623 newDigits = new BigDigit[inputLen]; 624 foreach_reverse (ref digit; newDigits) 625 { 626 digit = magnitude.front; 627 magnitude.popFront(); 628 } 629 } 630 else static if (E.sizeof < BigDigit.sizeof) 631 { 632 enum elementsPerDigit = BigDigit.sizeof / E.sizeof; 633 newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit]; 634 immutable remainder = inputLen % elementsPerDigit; 635 // If there is a remainder specially assemble the most significant digit. 636 if (remainder) 637 { 638 BigDigit tmp = magnitude.front; 639 magnitude.popFront(); 640 foreach (_; 1 .. remainder) 641 { 642 tmp = (tmp << (E.sizeof * 8)) | magnitude.front; 643 magnitude.popFront(); 644 } 645 newDigits[$-1] = tmp; 646 } 647 // Assemble full digits from most to least significant. 648 foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)]) 649 { 650 BigDigit tmp; 651 static foreach (n; 0 .. elementsPerDigit) 652 { 653 tmp |= cast(BigDigit) magnitude.front << 654 ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8); 655 magnitude.popFront(); 656 } 657 digit = tmp; 658 } 659 } 660 else static if (E.sizeof > BigDigit.sizeof) 661 { 662 enum digitsPerElement = E.sizeof / BigDigit.sizeof; 663 newDigits = new BigDigit[inputLen * digitsPerElement]; 664 size_t i = newDigits.length - 1; 665 foreach (element; magnitude) 666 { 667 static foreach (n; 0 .. digitsPerElement) 668 newDigits[i - n] = 669 cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8)); 670 i -= digitsPerElement; 671 } 672 while (newDigits[$-1] == 0) 673 newDigits = newDigits[0 .. $-1]; 674 } 675 else 676 static assert(0); 677 this.data = trustedAssumeUnique(newDigits); 678 return; 679 } 680 681 nothrow pure @safe unittest 682 { 683 immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD]; 684 // Internal representation is most-significant-last but `fromMagnitude` 685 // argument is most-significant-first. 686 immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005]; 687 BigUint b; 688 // Test with reference magnitude. 689 b.fromMagnitude(referenceMagnitude); 690 assert(b.data == referenceData); 691 // Test ubyte array. 692 import std.bitmanip : nativeToBigEndian; 693 ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~ 694 nativeToBigEndian(referenceMagnitude[1]) ~ 695 nativeToBigEndian(referenceMagnitude[2]); 696 b.data = ZERO; 697 b.fromMagnitude(ubyteMagnitude); 698 assert(b.data == referenceData); 699 // Test ulong array. 700 static if (BigDigit.sizeof == uint.sizeof) 701 immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]), 702 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2], 703 ]; 704 else static if (BigDigit.sizeof == ulong.sizeof) 705 alias ulongMagnitude = referenceMagnitude; 706 b.data = ZERO; 707 b.fromMagnitude(ulongMagnitude); 708 assert(b.data == referenceData); 709 } 710 711 //////////////////////// 712 // 713 // All of these member functions create a new BigUint. 714 715 // return x >> y 716 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope 717 if (op == ">>" && is (Tulong == ulong)) 718 { 719 assert(y > 0, "Can not right shift BigUint by 0"); 720 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 721 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO); 722 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 723 if (bits == 0) 724 { 725 return BigUint(data[words..$]); 726 } 727 else 728 { 729 uint [] result = new BigDigit[data.length - words]; 730 multibyteShr(result, data[words..$], bits); 731 732 if (result.length > 1 && result[$-1] == 0) 733 return BigUint(trustedAssumeUnique(result[0 .. $-1])); 734 else 735 return BigUint(trustedAssumeUnique(result)); 736 } 737 } 738 739 // return x << y 740 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope 741 if (op == "<<" && is (Tulong == ulong)) 742 { 743 assert(y > 0, "Can not left shift BigUint by 0"); 744 if (isZero()) return this; 745 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 746 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max), 747 "Shift result exceeds temporary store"); 748 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 749 BigDigit [] result = new BigDigit[data.length + words+1]; 750 result[0 .. words] = 0; 751 if (bits == 0) 752 { 753 result[words .. words+data.length] = data[]; 754 return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 755 } 756 else 757 { 758 immutable c = multibyteShl(result[words .. words+data.length], data, bits); 759 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 760 result[$-1] = c; 761 return BigUint(trustedAssumeUnique(result)); 762 } 763 } 764 765 // If wantSub is false, return x + y, leaving sign unchanged 766 // If wantSub is true, return abs(x - y), negating sign if x < y 767 static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, bool wantSub, ref bool sign) pure nothrow @safe 768 if (is(Tulong == ulong)) 769 { 770 BigUint r; 771 if (wantSub) 772 { // perform a subtraction 773 if (x.data.length > 2) 774 { 775 // subInt returns GC allocated array, can be safely cast to immutable 776 r.data = (() @trusted => cast(immutable) subInt(x.data, y))(); 777 } 778 else 779 { // could change sign! 780 ulong xx = x.data[0]; 781 if (x.data.length > 1) 782 xx += (cast(ulong) x.data[1]) << 32; 783 ulong d; 784 if (xx <= y) 785 { 786 d = y - xx; 787 sign = !sign; 788 } 789 else 790 { 791 d = xx - y; 792 } 793 if (d == 0) 794 { 795 r = 0UL; 796 sign = false; 797 return r; 798 } 799 if (d > uint.max) 800 { 801 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)]; 802 } 803 else 804 { 805 r.data = [cast(uint)(d & 0xFFFF_FFFF)]; 806 } 807 } 808 } 809 else 810 { 811 // addInt returns GC allocated array, can be safely cast to immutable 812 r.data = (() @trusted => cast(immutable) addInt(x.data, y))(); 813 } 814 return r; 815 } 816 817 // If wantSub is false, return x + y, leaving sign unchanged. 818 // If wantSub is true, return abs(x - y), negating sign if x<y 819 static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign) 820 pure nothrow @safe 821 { 822 BigUint r; 823 if (wantSub) 824 { // perform a subtraction 825 bool negative; 826 // sub returns GC allocated array, can be safely cast to immutable 827 r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))(); 828 sign ^= negative; 829 if (r.isZero()) 830 { 831 sign = false; 832 } 833 } 834 else 835 { 836 // add returns GC allocated array, can be safely cast to immutable 837 r.data = (() @trusted => cast(immutable) add(x.data, y.data))(); 838 } 839 return r; 840 } 841 842 843 // return x*y. 844 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe 845 { 846 if (y == 0 || x == 0) return BigUint(ZERO); 847 static if (T.sizeof * 8 <= 32) 848 uint hi = 0; 849 else 850 uint hi = cast(uint) (y >>> 32); 851 uint lo = cast(uint) (y & 0xFFFF_FFFF); 852 uint [] result = new BigDigit[x.data.length+1+(hi != 0)]; 853 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0); 854 if (hi != 0) 855 { 856 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1], 857 x.data, hi, 0); 858 } 859 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 860 } 861 862 /* return x * y. 863 */ 864 static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe 865 { 866 if (y == 0 || x == 0) 867 return BigUint(ZERO); 868 auto len = x.data.length + y.data.length; 869 BigDigit [] result = new BigDigit[len]; 870 if (y.data.length > x.data.length) 871 { 872 mulInternal(result, y.data, x.data); 873 } 874 else 875 { 876 if (x.data[]==y.data[]) squareInternal(result, x.data); 877 else mulInternal(result, x.data, y.data); 878 } 879 // the highest element could be zero, 880 // in which case we need to reduce the length 881 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 882 } 883 884 // return x / y 885 static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe 886 if ( is(immutable T == immutable uint) ) 887 { 888 uint y = y_; 889 if (y == 1) 890 return x; 891 uint [] result = new BigDigit[x.data.length]; 892 if ((y&(-y))==y) 893 { 894 assert(y != 0, "BigUint division by zero"); 895 // perfect power of 2 896 uint b = 0; 897 for (;y != 1; y>>=1) 898 { 899 ++b; 900 } 901 multibyteShr(result, x.data, b); 902 } 903 else 904 { 905 result[] = x.data[]; 906 cast(void) multibyteDivAssign(result, y, 0); 907 } 908 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 909 } 910 911 static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe 912 if ( is(immutable T == immutable ulong) ) 913 { 914 if (y <= uint.max) 915 return divInt!uint(x, cast(uint) y); 916 if (x.data.length < 2) 917 return BigUint(ZERO); 918 uint hi = cast(uint)(y >>> 32); 919 uint lo = cast(uint)(y & 0xFFFF_FFFF); 920 immutable uint[2] z = [lo, hi]; 921 BigDigit[] result = new BigDigit[x.data.length - z.length + 1]; 922 divModInternal(result, null, x.data, z[]); 923 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 924 } 925 926 // return x % y 927 static uint modInt(T)(scope BigUint x, T y_) pure 928 if ( is(immutable T == immutable uint) ) 929 { 930 import core.memory : GC; 931 uint y = y_; 932 assert(y != 0, "% 0 not allowed"); 933 if ((y&(-y)) == y) 934 { // perfect power of 2 935 return x.data[0] & (y-1); 936 } 937 else 938 { 939 // horribly inefficient - malloc, copy, & store are unnecessary. 940 uint [] wasteful = new BigDigit[x.data.length]; 941 wasteful[] = x.data[]; 942 immutable rem = multibyteDivAssign(wasteful, y, 0); 943 () @trusted { GC.free(wasteful.ptr); } (); 944 return rem; 945 } 946 } 947 948 // return x / y 949 static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe 950 { 951 if (y.data.length > x.data.length) 952 return BigUint(ZERO); 953 if (y.data.length == 1) 954 return divInt(x, y.data[0]); 955 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 956 divModInternal(result, null, x.data, y.data); 957 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 958 } 959 960 // return x % y 961 static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe 962 { 963 if (y.data.length > x.data.length) return x; 964 if (y.data.length == 1) 965 { 966 return BigUint([modInt(x, y.data[0])]); 967 } 968 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 969 BigDigit [] rem = new BigDigit[y.data.length]; 970 divModInternal(result, rem, x.data, y.data); 971 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 972 } 973 974 // Return x / y in quotient, x % y in remainder 975 static void divMod(BigUint x, scope BigUint y, 976 out BigUint quotient, out BigUint remainder) pure nothrow @safe 977 { 978 /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */ 979 if (y.data.length > x.data.length) 980 { 981 quotient = 0uL; 982 remainder = x; 983 } 984 else if (y.data.length == 1) 985 { 986 quotient = divInt(x, y.data[0]); 987 remainder = BigUint([modInt(x, y.data[0])]); 988 } 989 else 990 { 991 BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1]; 992 BigDigit[] rem = new BigDigit[y.data.length]; 993 divModInternal(quot, rem, x.data, y.data); 994 quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot))); 995 remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 996 } 997 } 998 999 // return x op y 1000 static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign) 1001 pure nothrow @safe 1002 if (op == "|" || op == "^" || op == "&") 1003 { 1004 auto d1 = includeSign(x.data, y.uintLength, xSign); 1005 auto d2 = includeSign(y.data, x.uintLength, ySign); 1006 1007 foreach (i; 0 .. d1.length) 1008 { 1009 mixin("d1[i] " ~ op ~ "= d2[i];"); 1010 } 1011 1012 mixin("resultSign = xSign " ~ op ~ " ySign;"); 1013 1014 if (resultSign) 1015 { 1016 twosComplement(d1, d1); 1017 } 1018 1019 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1))); 1020 } 1021 1022 /** 1023 * Return a BigUint which is x raised to the power of y. 1024 * Method: Powers of 2 are removed from x, then left-to-right binary 1025 * exponentiation is used. 1026 * Memory allocation is minimized: at most one temporary BigUint is used. 1027 */ 1028 static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe 1029 { 1030 // Deal with the degenerate cases first. 1031 if (y == 0) return BigUint(ONE); 1032 if (y == 1) return x; 1033 if (x == 0 || x == 1) return x; 1034 1035 BigUint result; 1036 1037 // Simplify, step 1: Remove all powers of 2. 1038 uint firstnonzero = firstNonZeroDigit(x.data); 1039 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits)) 1040 // where BigDigitBits = BigDigit.sizeof * 8 1041 1042 // See if x[firstnonzero..$] can now fit into a single digit. 1043 bool singledigit = ((x.data.length - firstnonzero) == 1); 1044 // If true, then x0 is that digit 1045 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits)) 1046 BigDigit x0 = x.data[firstnonzero]; 1047 assert(x0 != 0, "pow(0, y) not allowed"); 1048 // Length of the non-zero portion 1049 size_t nonzerolength = x.data.length - firstnonzero; 1050 ulong y0; 1051 uint evenbits = 0; // number of even bits in the bottom of x 1052 while (!(x0 & 1)) 1053 { 1054 x0 >>= 1; 1055 ++evenbits; 1056 } 1057 1058 if (x.data.length- firstnonzero == 2) 1059 { 1060 // Check for a single digit straddling a digit boundary 1061 const BigDigit x1 = x.data[firstnonzero+1]; 1062 if ((x1 >> evenbits) == 0) 1063 { 1064 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits)); 1065 singledigit = true; 1066 } 1067 } 1068 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1069 1070 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end 1071 1072 // Simplify, step 2: For singledigits, see if we can trivially reduce y 1073 1074 BigDigit finalMultiplier = 1UL; 1075 1076 if (singledigit) 1077 { 1078 // x fits into a single digit. Raise it to the highest power we can 1079 // that still fits into a single digit, then reduce the exponent accordingly. 1080 // We're quite likely to have a residual multiply at the end. 1081 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100. 1082 // and 5^^13 still fits into a uint. 1083 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK); 1084 if (x0 == 1) 1085 { // Perfect power of 2 1086 result = 1UL; 1087 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1088 } 1089 immutable p = highestPowerBelowUintMax(x0); 1090 if (y <= p) 1091 { // Just do it with pow 1092 result = cast(ulong) intpow(x0, y); 1093 if (evenbits + firstnonzero == 0) 1094 return result; 1095 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1096 } 1097 y0 = y / p; 1098 finalMultiplier = intpow(x0, y - y0*p); 1099 x0 = intpow(x0, p); 1100 // Result is x0 1101 nonzerolength = 1; 1102 } 1103 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1104 1105 // Perform a crude check for overflow and allocate result buffer. 1106 // The length required is y * lg2(x) bits. 1107 // which will always fit into y*x.length digits. But this is 1108 // a gross overestimate if x is small (length 1 or 2) and the highest 1109 // digit is nearly empty. 1110 // A better estimate is: 1111 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits, 1112 // and the first term is always between 1113 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and 1114 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS 1115 // For single digit payloads, we already have 1116 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1117 // and x0 is almost a full digit, so it's a tight estimate. 1118 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y 1119 // Note that the divisions must be rounded up. 1120 1121 // Estimated length in BigDigits 1122 immutable estimatelength = singledigit 1123 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y 1124 : x.data.length * y; 1125 // Imprecise check for overflow. Makes the extreme cases easier to debug 1126 // (less extreme overflow will result in an out of memory error). 1127 if (estimatelength > uint.max/(4*BigDigit.sizeof)) 1128 assert(0, "Overflow in BigInt.pow"); 1129 1130 // The result buffer includes space for all the trailing zeros 1131 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength]; 1132 1133 // Do all the powers of 2! 1134 size_t result_start = cast(size_t)( firstnonzero * y 1135 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0)); 1136 1137 resultBuffer[0 .. result_start] = 0; 1138 BigDigit [] t1 = resultBuffer[result_start..$]; 1139 BigDigit [] r1; 1140 1141 if (singledigit) 1142 { 1143 r1 = t1[0 .. 1]; 1144 r1[0] = x0; 1145 y = y0; 1146 } 1147 else 1148 { 1149 // It's not worth right shifting by evenbits unless we also shrink the length after each 1150 // multiply or squaring operation. That might still be worthwhile for large y. 1151 r1 = t1[0 .. x.data.length - firstnonzero]; 1152 r1[0..$] = x.data[firstnonzero..$]; 1153 } 1154 1155 if (y>1) 1156 { // Set r1 = r1 ^^ y. 1157 // The secondary buffer only needs space for the multiplication results 1158 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start]; 1159 BigDigit [] r2; 1160 1161 int shifts = 63; // num bits in a long 1162 while (!(y & 0x8000_0000_0000_0000L)) 1163 { 1164 y <<= 1; 1165 --shifts; 1166 } 1167 y <<=1; 1168 1169 while (y != 0) 1170 { 1171 // For each bit of y: Set r1 = r1 * r1 1172 // If the bit is 1, set r1 = r1 * x 1173 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5. 1174 // Optimization opportunity: if more than 2 bits in y are set, 1175 // it's usually possible to reduce the number of multiplies 1176 // by caching odd powers of x. eg for y = 54, 1177 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2 1178 r2 = t2[0 .. r1.length*2]; 1179 squareInternal(r2, r1); 1180 if (y & 0x8000_0000_0000_0000L) 1181 { 1182 r1 = t1[0 .. r2.length + nonzerolength]; 1183 if (singledigit) 1184 { 1185 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0); 1186 } 1187 else 1188 { 1189 mulInternal(r1, r2, x.data[firstnonzero..$]); 1190 } 1191 } 1192 else 1193 { 1194 r1 = t1[0 .. r2.length]; 1195 r1[] = r2[]; 1196 } 1197 y <<=1; 1198 shifts--; 1199 } 1200 while (shifts>0) 1201 { 1202 r2 = t2[0 .. r1.length * 2]; 1203 squareInternal(r2, r1); 1204 r1 = t1[0 .. r2.length]; 1205 r1[] = r2[]; 1206 --shifts; 1207 } 1208 } 1209 1210 if (finalMultiplier != 1) 1211 { 1212 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0); 1213 if (carry) 1214 { 1215 r1 = t1[0 .. r1.length + 1]; 1216 r1[$-1] = carry; 1217 } 1218 } 1219 if (evenshiftbits) 1220 { 1221 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits); 1222 if (carry != 0) 1223 { 1224 r1 = t1[0 .. r1.length + 1]; 1225 r1[$ - 1] = carry; 1226 } 1227 } 1228 while (r1[$ - 1]==0) 1229 { 1230 r1=r1[0 .. $ - 1]; 1231 } 1232 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length])); 1233 } 1234 1235 // Implement toHash so that BigUint works properly as an AA key. 1236 size_t toHash() const @nogc nothrow pure @safe scope 1237 { 1238 return .hashOf(data); 1239 } 1240 1241 } // end BigUint 1242 1243 @safe pure nothrow unittest 1244 { 1245 // ulong comparison test 1246 BigUint a = [1]; 1247 assert(a == 1); 1248 // https://issues.dlang.org/show_bug.cgi?id=9548 1249 assert(a < 0x8000_0000_0000_0000UL); 1250 1251 // https://issues.dlang.org/show_bug.cgi?id=12234 1252 BigUint z = [0]; 1253 assert(z == 0UL); 1254 assert(!(z > 0UL)); 1255 assert(!(z < 0UL)); 1256 } 1257 1258 // https://issues.dlang.org/show_bug.cgi?id=16223 1259 @system pure nothrow unittest 1260 { 1261 BigUint a = [3]; 1262 int b = 5; 1263 assert(BigUint.mulInt(a,b) == 15); 1264 } 1265 1266 // Remove leading zeros from x, to restore the BigUint invariant 1267 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe 1268 { 1269 size_t k = x.length; 1270 while (k>1 && x[k - 1]==0) --k; 1271 return x[0 .. k]; 1272 } 1273 1274 pure @safe unittest 1275 { 1276 BigUint r = BigUint([5]); 1277 BigUint t = BigUint([7]); 1278 BigUint s = BigUint.mod(r, t); 1279 assert(s == 5); 1280 } 1281 1282 1283 @safe pure unittest 1284 { 1285 BigUint r; 1286 r = 5UL; 1287 assert(r.peekUlong(0) == 5UL); 1288 assert(r.peekUint(0) == 5U); 1289 r = 0x1234_5678_9ABC_DEF0UL; 1290 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL); 1291 assert(r.peekUint(0) == 0x9ABC_DEF0U); 1292 } 1293 1294 1295 // Pow tests 1296 pure @safe unittest 1297 { 1298 BigUint r, s; 1299 r.fromHexString("80000000_00000001"); 1300 s = BigUint.pow(r, 5); 1301 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000" 1302 ~ "_00000002_80000000_00000001"); 1303 assert(s == r); 1304 s = 10UL; 1305 s = BigUint.pow(s, 39); 1306 r.fromDecimalString("1000000000000000000000000000000000000000"); 1307 assert(s == r); 1308 r.fromHexString("1_E1178E81_00000000"); 1309 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds 1310 1311 r.fromDecimalString("000_000_00"); 1312 assert(r == 0); 1313 r.fromDecimalString("0007"); 1314 assert(r == 7); 1315 r.fromDecimalString("0"); 1316 assert(r == 0); 1317 } 1318 1319 // Radix conversion tests 1320 @safe pure unittest 1321 { 1322 BigUint r; 1323 r.fromHexString("1_E1178E81_00000000"); 1324 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000"); 1325 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000"); 1326 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000"); 1327 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000"); 1328 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000"); 1329 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000"); 1330 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000"); 1331 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000"); 1332 r = 0UL; 1333 assert(r.toHexString(0, '_', 0) == "0"); 1334 assert(r.toHexString(0, '_', 7) == "0000000"); 1335 assert(r.toHexString(0, '_', 7, ' ') == " 0"); 1336 assert(r.toHexString(0, '#', 9) == "0#00000000"); 1337 assert(r.toHexString(0, 0, 9) == "000000000"); 1338 } 1339 1340 // 1341 @safe pure unittest 1342 { 1343 BigUint r; 1344 r.fromHexString("1_E1178E81_00000000"); 1345 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000"); 1346 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000"); 1347 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000"); 1348 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000"); 1349 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000"); 1350 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000"); 1351 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000"); 1352 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000"); 1353 r = 0UL; 1354 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0"); 1355 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000"); 1356 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0"); 1357 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000"); 1358 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000"); 1359 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000"); 1360 } 1361 1362 1363 private: 1364 void twosComplement(const(BigDigit) [] x, BigDigit[] result) 1365 pure nothrow @safe 1366 { 1367 foreach (i; 0 .. x.length) 1368 { 1369 result[i] = ~x[i]; 1370 } 1371 result[x.length..$] = BigDigit.max; 1372 1373 foreach (i; 0 .. result.length) 1374 { 1375 if (result[i] == BigDigit.max) 1376 { 1377 result[i] = 0; 1378 } 1379 else 1380 { 1381 result[i] += 1; 1382 break; 1383 } 1384 } 1385 } 1386 1387 // Encode BigInt as BigDigit array (sign and 2's complement) 1388 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign) 1389 pure nothrow @safe 1390 { 1391 size_t length = (x.length > minSize) ? x.length : minSize; 1392 BigDigit [] result = new BigDigit[length]; 1393 if (sign) 1394 { 1395 twosComplement(x, result); 1396 } 1397 else 1398 { 1399 result[0 .. x.length] = x; 1400 } 1401 return result; 1402 } 1403 1404 // works for any type 1405 T intpow(T)(T x, ulong n) pure nothrow @safe 1406 { 1407 T p; 1408 1409 switch (n) 1410 { 1411 case 0: 1412 p = 1; 1413 break; 1414 1415 case 1: 1416 p = x; 1417 break; 1418 1419 case 2: 1420 p = x * x; 1421 break; 1422 1423 default: 1424 p = 1; 1425 while (1) 1426 { 1427 if (n & 1) 1428 p *= x; 1429 n >>= 1; 1430 if (!n) 1431 break; 1432 x *= x; 1433 } 1434 break; 1435 } 1436 return p; 1437 } 1438 1439 1440 // returns the maximum power of x that will fit in a uint. 1441 int highestPowerBelowUintMax(uint x) pure nothrow @safe 1442 { 1443 assert(x > 1, "x must be greater than 1"); 1444 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 1445 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7]; 1446 if (x<24) return maxpwr[x-2]; 1447 if (x<41) return 6; 1448 if (x<85) return 5; 1449 if (x<256) return 4; 1450 if (x<1626) return 3; 1451 if (x<65_536) return 2; 1452 return 1; 1453 } 1454 1455 // returns the maximum power of x that will fit in a ulong. 1456 int highestPowerBelowUlongMax(uint x) pure nothrow @safe 1457 { 1458 assert(x > 1, "x must be greater than 1"); 1459 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 1460 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 1461 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 1462 12, 12, 12, 12, 12, 12, 12, 12, 12]; 1463 if (x<41) return maxpwr[x-2]; 1464 if (x<57) return 11; 1465 if (x<85) return 10; 1466 if (x<139) return 9; 1467 if (x<256) return 8; 1468 if (x<566) return 7; 1469 if (x<1626) return 6; 1470 if (x<7132) return 5; 1471 if (x<65_536) return 4; 1472 if (x<2_642_246) return 3; 1473 return 2; 1474 } 1475 1476 version (StdUnittest) 1477 { 1478 1479 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe 1480 { 1481 int pwr = 1; 1482 for (ulong q = x;x*q < cast(ulong) uint.max; ) 1483 { 1484 q*=x; ++pwr; 1485 } 1486 return pwr; 1487 } 1488 1489 @safe pure unittest 1490 { 1491 assert(highestPowerBelowUintMax(10)==9); 1492 for (int k=82; k<88; ++k) 1493 { 1494 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k)); 1495 } 1496 } 1497 1498 } 1499 1500 1501 /* General unsigned subtraction routine for bigints. 1502 * Sets result = x - y. If the result is negative, negative will be true. 1503 * Returns: 1504 * unique memory 1505 */ 1506 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative) 1507 pure nothrow @safe 1508 { 1509 if (x.length == y.length) 1510 { 1511 // There's a possibility of cancellation, if x and y are almost equal. 1512 ptrdiff_t last = highestDifferentDigit(x, y); 1513 BigDigit [] result = new BigDigit[last+1]; 1514 if (x[last] < y[last]) 1515 { // we know result is negative 1516 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0); 1517 *negative = true; 1518 } 1519 else 1520 { // positive or zero result 1521 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0); 1522 *negative = false; 1523 } 1524 while (result.length > 1 && result[$-1] == 0) 1525 { 1526 result = result[0..$-1]; 1527 } 1528 // if (result.length >1 && result[$-1]==0) return result[0..$-1]; 1529 return result; 1530 } 1531 // Lengths are different 1532 const(BigDigit) [] large, small; 1533 if (x.length < y.length) 1534 { 1535 *negative = true; 1536 large = y; small = x; 1537 } 1538 else 1539 { 1540 *negative = false; 1541 large = x; small = y; 1542 } 1543 // result.length will be equal to larger length, or could decrease by 1. 1544 1545 1546 BigDigit [] result = new BigDigit[large.length]; 1547 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0); 1548 result[small.length..$] = large[small.length..$]; 1549 if (carry) 1550 { 1551 multibyteIncrementAssign!('-')(result[small.length..$], carry); 1552 } 1553 while (result.length > 1 && result[$-1] == 0) 1554 { 1555 result = result[0..$-1]; 1556 } 1557 return result; 1558 } 1559 1560 1561 /* 1562 * return a + b 1563 * Returns: 1564 * unique memory 1565 */ 1566 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe 1567 { 1568 const(BigDigit) [] x, y; 1569 if (a.length < b.length) 1570 { 1571 x = b; y = a; 1572 } 1573 else 1574 { 1575 x = a; y = b; 1576 } 1577 // now we know x.length > y.length 1578 // create result. add 1 in case it overflows 1579 BigDigit [] result = new BigDigit[x.length + 1]; 1580 1581 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0); 1582 if (x.length != y.length) 1583 { 1584 result[y.length..$-1]= x[y.length..$]; 1585 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry); 1586 } 1587 if (carry) 1588 { 1589 result[$-1] = carry; 1590 return result; 1591 } 1592 else 1593 return result[0..$-1]; 1594 } 1595 1596 /** return x + y 1597 */ 1598 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow 1599 { 1600 uint hi = cast(uint)(y >>> 32); 1601 uint lo = cast(uint)(y& 0xFFFF_FFFF); 1602 auto len = x.length; 1603 if (x.length < 2 && hi != 0) ++len; 1604 BigDigit [] result = new BigDigit[len+1]; 1605 result[0 .. x.length] = x[]; 1606 if (x.length < 2 && hi != 0) 1607 { 1608 result[1]=hi; 1609 hi=0; 1610 } 1611 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo); 1612 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi); 1613 if (carry) 1614 { 1615 result[$-1] = carry; 1616 return result; 1617 } 1618 else 1619 return result[0..$-1]; 1620 } 1621 1622 /** Return x - y. 1623 * x must be greater than y. 1624 */ 1625 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe 1626 { 1627 uint hi = cast(uint)(y >>> 32); 1628 uint lo = cast(uint)(y & 0xFFFF_FFFF); 1629 BigDigit [] result = new BigDigit[x.length]; 1630 result[] = x[]; 1631 multibyteIncrementAssign!('-')(result[], lo); 1632 if (hi) 1633 multibyteIncrementAssign!('-')(result[1..$], hi); 1634 if (result[$-1] == 0) 1635 return result[0..$-1]; 1636 else 1637 return result; 1638 } 1639 1640 /** General unsigned multiply routine for bigints. 1641 * Sets result = x * y. 1642 * 1643 * The length of y must not be larger than the length of x. 1644 * Different algorithms are used, depending on the lengths of x and y. 1645 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the 1646 * unbalanced case. (But I doubt it would be faster in practice). 1647 * 1648 */ 1649 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 1650 pure nothrow @safe 1651 { 1652 import core.memory : GC; 1653 assert( result.length == x.length + y.length, 1654 "result array must have enough space to store computed result"); 1655 assert( y.length > 0, "y must not be empty"); 1656 assert( x.length >= y.length, "x must be greater or equal than y"); 1657 if (y.length <= KARATSUBALIMIT) 1658 { 1659 // Small multiplier, we'll just use the asm classic multiply. 1660 if (y.length == 1) 1661 { // Trivial case, no cache effects to worry about 1662 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0); 1663 return; 1664 } 1665 1666 immutable CACHELIMIT = getCacheLimit; 1667 if (x.length + y.length < CACHELIMIT) 1668 return mulSimple(result, x, y); 1669 1670 // If x is so big that it won't fit into the cache, we divide it into chunks 1671 // Every chunk must be greater than y.length. 1672 // We make the first chunk shorter, if necessary, to ensure this. 1673 1674 auto chunksize = CACHELIMIT / y.length; 1675 immutable residual = x.length % chunksize; 1676 if (residual < y.length) 1677 { 1678 chunksize -= y.length; 1679 } 1680 1681 // Use schoolbook multiply. 1682 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y); 1683 auto done = chunksize; 1684 1685 while (done < x.length) 1686 { 1687 // result[done .. done+ylength] already has a value. 1688 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done; 1689 BigDigit [KARATSUBALIMIT] partial; 1690 partial[0 .. y.length] = result[done .. done+y.length]; 1691 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y); 1692 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]); 1693 done += chunksize; 1694 } 1695 return; 1696 } 1697 1698 immutable half = (x.length >> 1) + (x.length & 1); 1699 if (2*y.length*y.length <= x.length*x.length) 1700 { 1701 // UNBALANCED MULTIPLY 1702 // Use school multiply to cut into quasi-squares of Karatsuba-size 1703 // or larger. The ratio of the two sides of the 'square' must be 1704 // between 1.414:1 and 1:1. Use Karatsuba on each chunk. 1705 // 1706 // For maximum performance, we want the ratio to be as close to 1707 // 1:1 as possible. To achieve this, we can either pad x or y. 1708 // The best choice depends on the modulus x%y. 1709 auto numchunks = x.length / y.length; 1710 auto chunksize = y.length; 1711 auto extra = x.length % y.length; 1712 auto maxchunk = chunksize + extra; 1713 bool paddingY; // true = we're padding Y, false = we're padding X. 1714 bool isExtraSmall = extra * extra * 2 < y.length * y.length; 1715 if (numchunks == 1 && isExtraSmall) 1716 { 1717 // We divide (x_first_half * y) and (x_last_half * y) 1718 // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)). 1719 // (1.414 ~ 1.707)/2:1 is balanced. 1720 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length]; 1721 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1722 scratchbuff = scratchbuff[0 .. $ - y.length]; 1723 mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff); 1724 partial[] = result[half .. half + y.length]; 1725 mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff); 1726 BigDigit c = addAssignSimple(result[half .. half + y.length], partial); 1727 if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c); 1728 () @trusted { GC.free(scratchbuff.ptr); } (); 1729 } 1730 else 1731 { 1732 if (isExtraSmall) 1733 { 1734 // The leftover bit is small enough that it should be incorporated 1735 // in the existing chunks. 1736 // Make all the chunks a tiny bit bigger 1737 // (We're padding y with zeros) 1738 chunksize += extra / numchunks; 1739 extra = x.length - chunksize*numchunks; 1740 // there will probably be a few left over. 1741 // Every chunk will either have size chunksize, or chunksize+1. 1742 maxchunk = chunksize + 1; 1743 paddingY = true; 1744 assert(chunksize + extra + chunksize *(numchunks-1) == x.length, 1745 "Unexpected size"); 1746 } 1747 else 1748 { 1749 // the extra bit is large enough that it's worth making a new chunk. 1750 // (This means we're padding x with zeros, when doing the first one). 1751 maxchunk = chunksize; 1752 ++numchunks; 1753 paddingY = false; 1754 assert(extra + chunksize *(numchunks-1) == x.length, 1755 "Unexpected size"); 1756 } 1757 // We make the buffer a bit bigger so we have space for the partial sums. 1758 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length]; 1759 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1760 scratchbuff = scratchbuff[0 .. $ - y.length]; 1761 size_t done; // how much of X have we done so far? 1762 if (paddingY) 1763 { 1764 // If the first chunk is bigger, do it first. We're padding y. 1765 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )], 1766 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 1767 done = chunksize + (extra > 0 ? 1 : 0); 1768 if (extra) --extra; 1769 } 1770 else 1771 { // We're padding X. Begin with the extra bit. 1772 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff); 1773 done = extra; 1774 extra = 0; 1775 } 1776 immutable basechunksize = chunksize; 1777 while (done < x.length) 1778 { 1779 chunksize = basechunksize + (extra > 0 ? 1 : 0); 1780 if (extra) --extra; 1781 partial[] = result[done .. done+y.length]; 1782 mulKaratsuba(result[done .. done + y.length + chunksize], 1783 x[done .. done+chunksize], y, scratchbuff); 1784 addAssignSimple(result[done .. done + y.length + chunksize], partial); 1785 done += chunksize; 1786 } 1787 () @trusted { GC.free(scratchbuff.ptr); } (); 1788 } 1789 } 1790 else 1791 { 1792 // Balanced. Use Karatsuba directly. 1793 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1794 mulKaratsuba(result, x, y, scratchbuff); 1795 () @trusted { GC.free(scratchbuff.ptr); } (); 1796 } 1797 } 1798 1799 // https://issues.dlang.org/show_bug.cgi?id=20493 1800 @safe unittest 1801 { 1802 // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300) 1803 // the number itself isn't important, only the amount of digits, so we do a simpler 1804 // multiplication of the same size, analogous to: 1805 // 11111111 * 11111111 = 0123456787654321 1806 // but instead of base 10, it's in base `BigDigit` 1807 1808 BigDigit[398] x = 1; 1809 BigDigit[236] y = 1; 1810 BigDigit[x.length + y.length] result; 1811 mulInternal(result[], x[], y[]); 1812 1813 // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0] 1814 BigDigit[x.length + y.length] expected = y.length; 1815 foreach (BigDigit i; 0 .. y.length) 1816 { 1817 expected[i] = i+1; 1818 expected[$-1-i] = i; 1819 } 1820 1821 assert(result == expected); 1822 } 1823 1824 /** General unsigned squaring routine for BigInts. 1825 * Sets result = x*x. 1826 * NOTE: If the highest half-digit of x is zero, the highest digit of result will 1827 * also be zero. 1828 */ 1829 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe 1830 { 1831 import core.memory : GC; 1832 // Squaring is potentially half a multiply, plus add the squares of 1833 // the diagonal elements. 1834 assert(result.length == 2*x.length, 1835 "result needs to have twice the capacity of x"); 1836 if (x.length <= KARATSUBASQUARELIMIT) 1837 { 1838 if (x.length == 1) 1839 { 1840 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0); 1841 return; 1842 } 1843 return squareSimple(result, x); 1844 } 1845 // The nice thing about squaring is that it always stays balanced 1846 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1847 squareKaratsuba(result, x, scratchbuff); 1848 () @trusted { GC.free(scratchbuff.ptr); } (); 1849 } 1850 1851 1852 import core.bitop : bsr; 1853 1854 /// if remainder is null, only calculate quotient. 1855 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u, 1856 const BigDigit [] v) pure nothrow @safe 1857 { 1858 import core.memory : GC; 1859 assert(quotient.length == u.length - v.length + 1, 1860 "Invalid quotient length"); 1861 assert(remainder == null || remainder.length == v.length, 1862 "Invalid remainder"); 1863 assert(v.length > 1, "v must have more than 1 element"); 1864 assert(u.length >= v.length, "u must be as longer or longer than v"); 1865 1866 // Normalize by shifting v left just enough so that 1867 // its high-order bit is on, and shift u left the 1868 // same amount. The highest bit of u will never be set. 1869 1870 BigDigit [] vn = new BigDigit[v.length]; 1871 BigDigit [] un = new BigDigit[u.length + 1]; 1872 // How much to left shift v, so that its MSB is set. 1873 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]); 1874 if (s != 0) 1875 { 1876 multibyteShl(vn, v, s); 1877 un[$-1] = multibyteShl(un[0..$-1], u, s); 1878 } 1879 else 1880 { 1881 vn[] = v[]; 1882 un[0..$-1] = u[]; 1883 un[$-1] = 0; 1884 } 1885 if (quotient.length<FASTDIVLIMIT) 1886 { 1887 schoolbookDivMod(quotient, un, vn); 1888 } 1889 else 1890 { 1891 blockDivMod(quotient, un, vn); 1892 } 1893 1894 // Unnormalize remainder, if required. 1895 if (remainder != null) 1896 { 1897 if (s == 0) remainder[] = un[0 .. vn.length]; 1898 else multibyteShr(remainder, un[0 .. vn.length+1], s); 1899 } 1900 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } (); 1901 } 1902 1903 pure @safe unittest 1904 { 1905 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000]; 1906 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000]; 1907 uint [] q = new uint[u.length - v.length + 1]; 1908 uint [] r = new uint[2]; 1909 divModInternal(q, r, u, v); 1910 assert(q[]==[0xFFFF_FFFFu, 0]); 1911 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]); 1912 u = [0, 0xFFFF_FFFE, 0x8000_0001]; 1913 v = [0xFFFF_FFFF, 0x8000_0000]; 1914 divModInternal(q, r, u, v); 1915 } 1916 1917 1918 // Converts a big uint to a hexadecimal string. 1919 // 1920 // Optionally, a separator character (eg, an underscore) may be added between 1921 // every 8 digits. 1922 // buff.length must be data.length*8 if separator is zero, 1923 // or data.length*9 if separator is non-zero. It will be completely filled. 1924 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0, 1925 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 1926 { 1927 int x=0; 1928 for (ptrdiff_t i=data.length - 1; i >= 0; --i) 1929 { 1930 toHexZeroPadded(buff[x .. x+8], data[i], letterCase); 1931 x+=8; 1932 if (separator) 1933 { 1934 if (i>0) buff[x] = separator; 1935 ++x; 1936 } 1937 } 1938 return buff; 1939 } 1940 1941 /** 1942 * Convert a big uint into an octal string. 1943 * 1944 * Params: 1945 * buff = The destination buffer for the octal string. Must be large enough to 1946 * store the result, including leading zeroes, which is 1947 * ceil(data.length * BigDigitBits / 3) characters. 1948 * The buffer is filled from back to front, starting from `buff[$-1]`. 1949 * data = The biguint to be converted. 1950 * 1951 * Returns: The index of the leading non-zero digit in `buff`. Will be 1952 * `buff.length - 1` if the entire big uint is zero. 1953 */ 1954 size_t biguintToOctal(char[] buff, const(BigDigit)[] data) 1955 pure nothrow @safe @nogc 1956 { 1957 ubyte carry = 0; 1958 int shift = 0; 1959 size_t penPos = buff.length - 1; 1960 size_t lastNonZero = buff.length - 1; 1961 1962 pragma(inline) void output(uint digit) @nogc nothrow 1963 { 1964 if (digit != 0) 1965 lastNonZero = penPos; 1966 buff[penPos--] = cast(char)('0' + digit); 1967 } 1968 1969 foreach (bigdigit; data) 1970 { 1971 if (shift < 0) 1972 { 1973 // Some bits were carried over from previous word. 1974 assert(shift > -3, "shift must be greater than -3"); 1975 output(((bigdigit << -shift) | carry) & 0b111); 1976 shift += 3; 1977 assert(shift > 0, "shift must be 1 or greater"); 1978 } 1979 1980 while (shift <= BigDigitBits - 3) 1981 { 1982 output((bigdigit >>> shift) & 0b111); 1983 shift += 3; 1984 } 1985 1986 if (shift < BigDigitBits) 1987 { 1988 // Some bits need to be carried forward. 1989 carry = (bigdigit >>> shift) & 0b11; 1990 } 1991 shift -= BigDigitBits; 1992 assert(shift >= -2 && shift <= 0, "shift must in [-2,0]"); 1993 } 1994 1995 if (shift < 0) 1996 { 1997 // Last word had bits that haven't been output yet. 1998 assert(shift > -3, "Shift must be greater than -3"); 1999 output(carry); 2000 } 2001 2002 return lastNonZero; 2003 } 2004 2005 /** Convert a big uint into a decimal string. 2006 * 2007 * Params: 2008 * data The biguint to be converted. Will be destroyed. 2009 * buff The destination buffer for the decimal string. Must be 2010 * large enough to store the result, including leading zeros. 2011 * Will be filled backwards, starting from buff[$-1]. 2012 * 2013 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length. 2014 * Returns: 2015 * the lowest index of buff which was used. 2016 */ 2017 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe 2018 { 2019 ptrdiff_t sofar = buff.length; 2020 // Might be better to divide by (10^38/2^32) since that gives 38 digits for 2021 // the price of 3 divisions and a shr; this version only gives 27 digits 2022 // for 3 divisions. 2023 while (data.length>1) 2024 { 2025 uint rem = multibyteDivAssign(data, 10_0000_0000, 0); 2026 itoaZeroPadded(buff[sofar-9 .. sofar], rem); 2027 sofar -= 9; 2028 if (data[$-1] == 0 && data.length > 1) 2029 { 2030 data.length = data.length - 1; 2031 } 2032 } 2033 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]); 2034 sofar -= 10; 2035 // and strip off the leading zeros 2036 while (sofar != buff.length-1 && buff[sofar] == '0') 2037 sofar++; 2038 return sofar; 2039 } 2040 2041 /** Convert a decimal string into a big uint. 2042 * 2043 * Params: 2044 * data The biguint to be receive the result. Must be large enough to 2045 * store the result. 2046 * s The decimal string. May contain _ or 0 .. 9 2047 * 2048 * The required length for the destination buffer is slightly less than 2049 * 1 + s.length/log2(10) = 1 + s.length/3.3219. 2050 * 2051 * Returns: 2052 * the highest index of data which was used. 2053 */ 2054 int biguintFromDecimal(Range)(BigDigit[] data, Range s) 2055 if ( 2056 isInputRange!Range && 2057 isSomeChar!(ElementType!Range) && 2058 !isInfinite!Range) 2059 in 2060 { 2061 static if (hasLength!Range) 2062 assert((data.length >= 2) || (data.length == 1 && s.length == 1), 2063 "data has a invalid length"); 2064 } 2065 do 2066 { 2067 import std.conv : ConvException; 2068 2069 // Convert to base 1e19 = 10_000_000_000_000_000_000. 2070 // (this is the largest power of 10 that will fit into a long). 2071 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219. 2072 // 485 bits will only just fit into 146 decimal digits. 2073 // As we convert the string, we record the number of digits we've seen in base 19: 2074 // hi is the number of digits/19, lo is the extra digits (0 to 18). 2075 // TODO: This is inefficient for very large strings (it is O(n^^2)). 2076 // We should take advantage of fast multiplication once the numbers exceed 2077 // Karatsuba size. 2078 uint lo = 0; // number of powers of digits, 0 .. 18 2079 uint x = 0; 2080 ulong y = 0; 2081 uint hi = 0; // number of base 1e19 digits 2082 data[0] = 0; // initially number is 0. 2083 if (data.length > 1) 2084 data[1] = 0; 2085 2086 foreach (character; s) 2087 { 2088 if (character == '_') 2089 continue; 2090 2091 if (character < '0' || character > '9') 2092 throw new ConvException("invalid digit"); 2093 x *= 10; 2094 x += character - '0'; 2095 ++lo; 2096 if (lo == 9) 2097 { 2098 y = x; 2099 x = 0; 2100 } 2101 if (lo == 18) 2102 { 2103 y *= 10_0000_0000; 2104 y += x; 2105 x = 0; 2106 } 2107 if (lo == 19) 2108 { 2109 y *= 10; 2110 y += x; 2111 x = 0; 2112 // Multiply existing number by 10^19, then add y1. 2113 if (hi>0) 2114 { 2115 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A 2116 ++hi; 2117 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000 2118 ++hi; 2119 } 2120 else 2121 hi = 2; 2122 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2123 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2124 if (c != 0) 2125 { 2126 data[hi]=c; 2127 ++hi; 2128 } 2129 y = 0; 2130 lo = 0; 2131 } 2132 } 2133 // Now set y = all remaining digits. 2134 if (lo >= 18) 2135 { 2136 } 2137 else if (lo >= 9) 2138 { 2139 for (int k=9; k<lo; ++k) y*=10; 2140 y+=x; 2141 } 2142 else 2143 { 2144 for (int k=0; k<lo; ++k) y*=10; 2145 y+=x; 2146 } 2147 if (lo != 0) 2148 { 2149 if (hi == 0) 2150 { 2151 data[0] = cast(uint) y; 2152 if (data.length == 1) 2153 { 2154 hi = 1; 2155 } 2156 else 2157 { 2158 data[1] = cast(uint)(y >>> 32); 2159 hi=2; 2160 } 2161 } 2162 else 2163 { 2164 while (lo>0) 2165 { 2166 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0); 2167 if (c != 0) 2168 { 2169 data[hi]=c; 2170 ++hi; 2171 } 2172 --lo; 2173 } 2174 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2175 if (y > 0xFFFF_FFFFL) 2176 { 2177 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2178 } 2179 if (c != 0) 2180 { 2181 data[hi]=c; 2182 ++hi; 2183 } 2184 } 2185 } 2186 while (hi>1 && data[hi-1]==0) 2187 --hi; 2188 return hi; 2189 } 2190 2191 2192 // ------------------------ 2193 // These in-place functions are only for internal use; they are incompatible 2194 // with COW. 2195 2196 // Classic 'schoolbook' multiplication. 2197 void mulSimple(BigDigit[] result, const(BigDigit) [] left, 2198 const(BigDigit)[] right) pure nothrow @safe 2199 in 2200 { 2201 assert(result.length == left.length + right.length, 2202 "Result must be able to store left + right"); 2203 assert(right.length>1, "right must not be empty"); 2204 } 2205 do 2206 { 2207 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0); 2208 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 2209 } 2210 2211 // Classic 'schoolbook' squaring 2212 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe 2213 in 2214 { 2215 assert(result.length == 2*x.length, "result must be twice as long as x"); 2216 assert(x.length>1, "x must not be empty"); 2217 } 2218 do 2219 { 2220 multibyteSquare(result, x); 2221 } 2222 2223 2224 // add two uints of possibly different lengths. Result must be as long 2225 // as the larger length. 2226 // Returns carry (0 or 1). 2227 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right) 2228 pure nothrow @safe 2229 in 2230 { 2231 assert(result.length == left.length, 2232 "result and left must be of the same length"); 2233 assert(left.length >= right.length, 2234 "left must be longer or of equal length to right"); 2235 assert(right.length > 0, "right must not be empty"); 2236 } 2237 do 2238 { 2239 uint carry = multibyteAdd(result[0 .. right.length], 2240 left[0 .. right.length], right, 0); 2241 if (right.length < left.length) 2242 { 2243 result[right.length .. left.length] = left[right.length .. $]; 2244 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 2245 } 2246 return carry; 2247 } 2248 2249 /* result = result - right 2250 * Returns carry = 1 if result was less than right. 2251 */ 2252 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2253 pure nothrow @safe 2254 { 2255 assert(result.length >= right.length, 2256 "result must be longer or of equal length to right"); 2257 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0); 2258 if (c && result.length > right.length) 2259 c = multibyteIncrementAssign!('-')(result[right.length .. $], c); 2260 return c; 2261 } 2262 2263 /* result = result + right 2264 */ 2265 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2266 pure nothrow @safe 2267 { 2268 assert(result.length >= right.length, 2269 "result must be longer or of equal length to right"); 2270 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0); 2271 if (c && result.length > right.length) 2272 c = multibyteIncrementAssign!('+')(result[right.length .. $], c); 2273 return c; 2274 } 2275 2276 /* performs result += wantSub? - right : right; 2277 */ 2278 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right, 2279 bool wantSub) pure nothrow @safe 2280 { 2281 if (wantSub) 2282 return subAssignSimple(result, right); 2283 else 2284 return addAssignSimple(result, right); 2285 } 2286 2287 2288 // return true if x<y, considering leading zeros 2289 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe 2290 { 2291 assert(x.length >= y.length, 2292 "x must be longer or of equal length to y"); 2293 auto k = x.length-1; 2294 while (x[k]==0 && k >= y.length) 2295 --k; 2296 if (k >= y.length) 2297 return false; 2298 while (k>0 && x[k]==y[k]) 2299 --k; 2300 return x[k] < y[k]; 2301 } 2302 2303 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y. 2304 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 2305 pure nothrow @safe 2306 { 2307 assert(result.length == ((x.length >= y.length) ? x.length : y.length), 2308 "result must capable to store the maximum of x and y"); 2309 2310 size_t minlen; 2311 bool negative; 2312 if (x.length >= y.length) 2313 { 2314 minlen = y.length; 2315 negative = less(x, y); 2316 } 2317 else 2318 { 2319 minlen = x.length; 2320 negative = !less(y, x); 2321 } 2322 const (BigDigit)[] large, small; 2323 if (negative) 2324 { 2325 large = y; small = x; 2326 } 2327 else 2328 { 2329 large = x; small = y; 2330 } 2331 2332 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0); 2333 if (x.length != y.length) 2334 { 2335 result[minlen .. large.length]= large[minlen..$]; 2336 result[large.length..$] = 0; 2337 if (carry) 2338 multibyteIncrementAssign!('-')(result[minlen..$], carry); 2339 } 2340 return negative; 2341 } 2342 2343 /* Determine how much space is required for the temporaries 2344 * when performing a Karatsuba multiplication. 2345 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see: 2346 * https://issues.dlang.org/show_bug.cgi?id=20493 2347 */ 2348 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe 2349 { 2350 return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4; 2351 } 2352 2353 /* Sets result = x*y, using Karatsuba multiplication. 2354 * x must be longer or equal to y. 2355 * Valid only for balanced multiplies, where x is not shorter than y. 2356 * It is superior to schoolbook multiplication if and only if 2357 * sqrt(2)*y.length > x.length > y.length. 2358 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2) 2359 * The maximum allowable length of x and y is uint.max; but better algorithms 2360 * should be used far before that length is reached. 2361 * Params: 2362 * scratchbuff An array long enough to store all the temporaries. Will be destroyed. 2363 */ 2364 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x, 2365 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe 2366 { 2367 assert(x.length >= y.length, "x must be greater or equal to y"); 2368 assert(result.length < uint.max, "Operands too large"); 2369 assert(result.length == x.length + y.length, 2370 "result must be as large as x + y"); 2371 if (x.length <= KARATSUBALIMIT) 2372 { 2373 return mulSimple(result, x, y); 2374 } 2375 // Must be almost square (otherwise, a schoolbook iteration is better) 2376 assert(2L * y.length * y.length > (x.length-1) * (x.length-1), 2377 "Bigint Internal Error: Asymmetric Karatsuba"); 2378 2379 // The subtractive version of Karatsuba multiply uses the following result: 2380 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid) 2381 // where mid = (x0-x1)*(y0-y1) 2382 // requiring 3 multiplies of length N, instead of 4. 2383 // The advantage of the subtractive over the additive version is that 2384 // the mid multiply cannot exceed length N. But there are subtleties: 2385 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we 2386 // retain all of the leading zeros in the subtractions 2387 2388 // half length, round up. 2389 auto half = (x.length >> 1) + (x.length & 1); 2390 2391 const(BigDigit) [] x0 = x[0 .. half]; 2392 const(BigDigit) [] x1 = x[half .. $]; 2393 const(BigDigit) [] y0 = y[0 .. half]; 2394 const(BigDigit) [] y1 = y[half .. $]; 2395 BigDigit [] mid = scratchbuff[0 .. half*2]; 2396 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2397 BigDigit [] resultLow = result[0 .. 2*half]; 2398 BigDigit [] resultHigh = result[2*half .. $]; 2399 // initially use result to store temporaries 2400 BigDigit [] xdiff= result[0 .. half]; 2401 BigDigit [] ydiff = result[half .. half*2]; 2402 2403 // First, we calculate mid, and sign of mid 2404 immutable bool midNegative = inplaceSub(xdiff, x0, x1) 2405 ^ inplaceSub(ydiff, y0, y1); 2406 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff); 2407 2408 // Low half of result gets x0 * y0. High half gets x1 * y1 2409 2410 mulKaratsuba(resultLow, x0, y0, newscratchbuff); 2411 2412 if (2L * y1.length * y1.length < x1.length * x1.length) 2413 { 2414 // an asymmetric situation has been created. 2415 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1. 2416 // Applying one schoolbook multiply gives us two pieces each 1.2:1 2417 if (y1.length <= KARATSUBALIMIT) 2418 mulSimple(resultHigh, x1, y1); 2419 else 2420 { 2421 // divide x1 in two, then use schoolbook multiply on the two pieces. 2422 auto quarter = (x1.length >> 1) + (x1.length & 1); 2423 immutable ysmaller = (quarter >= y1.length); 2424 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1, 2425 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff); 2426 // Save the part which will be overwritten. 2427 immutable ysmaller2 = ((x1.length - quarter) >= y1.length); 2428 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length]; 2429 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1, 2430 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]); 2431 2432 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]); 2433 } 2434 } 2435 else 2436 mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 2437 2438 /* We now have result = x0y0 + (N*N)*x1y1 2439 Before adding or subtracting mid, we must calculate 2440 result += N * (x0y0 + x1y1) 2441 We can do this with three half-length additions. With a = x0y0, b = x1y1: 2442 aHI aLO 2443 + aHI aLO 2444 + bHI bLO 2445 + bHI bLO 2446 = R3 R2 R1 R0 2447 R1 = aHI + bLO + aLO 2448 R2 = aHI + bLO + aHI + carry_from_R1 2449 R3 = bHi + carry_from_R2 2450 2451 It might actually be quicker to do it in two full-length additions: 2452 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]); 2453 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]); 2454 */ 2455 BigDigit[] R1 = result[half .. half*2]; 2456 BigDigit[] R2 = result[half*2 .. half*3]; 2457 BigDigit[] R3 = result[half*3..$]; 2458 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2459 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2460 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2461 if (c1+c2) 2462 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2463 if (c1+c3) 2464 multibyteIncrementAssign!('+')(R3, c1+c3); 2465 2466 // And finally we subtract mid 2467 addOrSubAssignSimple(result[half..$], mid, !midNegative); 2468 } 2469 2470 void squareKaratsuba(BigDigit [] result, const BigDigit [] x, 2471 BigDigit [] scratchbuff) pure nothrow @safe 2472 { 2473 // See mulKaratsuba for implementation comments. 2474 // Squaring is simpler, since it never gets asymmetric. 2475 assert(result.length < uint.max, "Operands too large"); 2476 assert(result.length == 2*x.length, 2477 "result must be twice the length of x"); 2478 if (x.length <= KARATSUBASQUARELIMIT) 2479 { 2480 return squareSimple(result, x); 2481 } 2482 // half length, round up. 2483 auto half = (x.length >> 1) + (x.length & 1); 2484 2485 const(BigDigit)[] x0 = x[0 .. half]; 2486 const(BigDigit)[] x1 = x[half .. $]; 2487 BigDigit [] mid = scratchbuff[0 .. half*2]; 2488 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2489 // initially use result to store temporaries 2490 BigDigit [] xdiff= result[0 .. half]; 2491 const BigDigit [] ydiff = result[half .. half*2]; 2492 2493 // First, we calculate mid. We don't need its sign 2494 inplaceSub(xdiff, x0, x1); 2495 squareKaratsuba(mid, xdiff, newscratchbuff); 2496 2497 // Set result = x0x0 + (N*N)*x1x1 2498 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff); 2499 squareKaratsuba(result[2*half .. $], x1, newscratchbuff); 2500 2501 /* result += N * (x0x0 + x1x1) 2502 Do this with three half-length additions. With a = x0x0, b = x1x1: 2503 R1 = aHI + bLO + aLO 2504 R2 = aHI + bLO + aHI + carry_from_R1 2505 R3 = bHi + carry_from_R2 2506 */ 2507 BigDigit[] R1 = result[half .. half*2]; 2508 BigDigit[] R2 = result[half*2 .. half*3]; 2509 BigDigit[] R3 = result[half*3..$]; 2510 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2511 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2512 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2513 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2514 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 2515 2516 // And finally we subtract mid, which is always positive 2517 subAssignSimple(result[half..$], mid); 2518 } 2519 2520 /* Knuth's Algorithm D, as presented in 2521 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002). 2522 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18. 2523 * Given u and v, calculates quotient = u / v, u = u % v. 2524 * v must be normalized (ie, the MSB of v must be 1). 2525 * The most significant words of quotient and u may be zero. 2526 * u[0 .. v.length] holds the remainder. 2527 */ 2528 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2529 pure nothrow @safe 2530 { 2531 assert(quotient.length == u.length - v.length, 2532 "quotient has wrong length"); 2533 assert(v.length > 1, "v must not be empty"); 2534 assert(u.length >= v.length, "u must be larger or equal to v"); 2535 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2536 assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]"); 2537 // BUG: This code only works if BigDigit is uint. 2538 uint vhi = v[$-1]; 2539 uint vlo = v[$-2]; 2540 2541 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--) 2542 { 2543 // Compute estimate of quotient[j], 2544 // qhat = (three most significant words of u)/(two most sig words of v). 2545 uint qhat; 2546 if (u[j + v.length] == vhi) 2547 { 2548 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001) 2549 qhat = uint.max; 2550 } 2551 else 2552 { 2553 uint ulo = u[j + v.length - 2]; 2554 version (D_InlineAsm_X86) 2555 { 2556 // Note: On DMD, this is only ~10% faster than the non-asm code. 2557 uint *p = &u[j + v.length - 1]; 2558 asm pure nothrow @trusted 2559 { 2560 mov EAX, p; 2561 mov EDX, [EAX+4]; 2562 mov EAX, [EAX]; 2563 div dword ptr [vhi]; 2564 mov qhat, EAX; 2565 mov ECX, EDX; 2566 div3by2correction: 2567 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo 2568 sub EAX, ulo; 2569 sbb EDX, ECX; 2570 jbe div3by2done; 2571 mov EAX, qhat; 2572 dec EAX; 2573 mov qhat, EAX; 2574 add ECX, dword ptr [vhi]; 2575 jnc div3by2correction; 2576 div3by2done: ; 2577 } 2578 } 2579 else 2580 { // version (InlineAsm) 2581 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1]; 2582 immutable bigqhat = uu / vhi; 2583 ulong rhat = uu - bigqhat * vhi; 2584 qhat = cast(uint) bigqhat; 2585 again: 2586 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo)) 2587 { 2588 --qhat; 2589 rhat += vhi; 2590 if (!(rhat & 0xFFFF_FFFF_0000_0000L)) 2591 goto again; 2592 } 2593 } // version (InlineAsm) 2594 } 2595 // Multiply and subtract. 2596 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0); 2597 2598 if (u[j+v.length] < carry) 2599 { 2600 // If we subtracted too much, add back 2601 --qhat; 2602 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0); 2603 } 2604 quotient[j] = qhat; 2605 u[j + v.length] = u[j + v.length] - carry; 2606 } 2607 } 2608 2609 // TODO: Replace with a library call 2610 void itoaZeroPadded(char[] output, uint value) 2611 pure nothrow @safe @nogc 2612 { 2613 for (auto i = output.length; i--;) 2614 { 2615 if (value < 10) 2616 { 2617 output[i] = cast(char)(value + '0'); 2618 value = 0; 2619 } 2620 else 2621 { 2622 output[i] = cast(char)(value % 10 + '0'); 2623 value /= 10; 2624 } 2625 } 2626 } 2627 2628 void toHexZeroPadded(char[] output, uint value, 2629 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 2630 { 2631 ptrdiff_t x = output.length - 1; 2632 static immutable string upperHexDigits = "0123456789ABCDEF"; 2633 static immutable string lowerHexDigits = "0123456789abcdef"; 2634 for ( ; x >= 0; --x) 2635 { 2636 if (letterCase == LetterCase.upper) 2637 { 2638 output[x] = upperHexDigits[value & 0xF]; 2639 } 2640 else 2641 { 2642 output[x] = lowerHexDigits[value & 0xF]; 2643 } 2644 value >>= 4; 2645 } 2646 } 2647 2648 // Returns the highest value of i for which left[i]!=right[i], 2649 // or 0 if left[] == right[] 2650 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right) 2651 pure nothrow @nogc @safe 2652 { 2653 assert(left.length == right.length, 2654 "left have a length equal to that of right"); 2655 for (ptrdiff_t i = left.length - 1; i>0; --i) 2656 { 2657 if (left[i] != right[i]) 2658 return i; 2659 } 2660 return 0; 2661 } 2662 2663 // Returns the lowest value of i for which x[i]!=0. 2664 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe 2665 { 2666 int k = 0; 2667 while (x[k]==0) 2668 { 2669 ++k; 2670 assert(k < x.length, "k must be less than x.length"); 2671 } 2672 return k; 2673 } 2674 2675 /* 2676 Calculate quotient and remainder of u / v using fast recursive division. 2677 v must be normalised, and must be at least half as long as u. 2678 Given u and v, v normalised, calculates quotient = u/v, u = u%v. 2679 scratch is temporary storage space, length must be >= quotient + 1. 2680 2681 Returns: 2682 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted. 2683 2684 Implements algorithm 1.8 from MCA. 2685 This algorithm has an annoying special case. After the first recursion, the 2686 highest bit of the quotient may be set. This means that in the second 2687 recursive call, the 'in' contract would be violated. (This happens only 2688 when the top quarter of u is equal to the top half of v. A base 10 2689 equivalent example of this situation is 5517/56; the first step gives 2690 55/5 = 11). To maintain the in contract, we pad a zero to the top of both 2691 u and the quotient. 'mayOverflow' indicates that that the special case 2692 has occurred. 2693 (In MCA, a different strategy is used: the in contract is weakened, and 2694 schoolbookDivMod is more general: it allows the high bit of u to be set). 2695 See also: 2696 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 2697 Max-Planck Institute fuer Informatik, (Oct 1998). 2698 */ 2699 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v, 2700 BigDigit[] scratch, bool mayOverflow = false) 2701 pure nothrow @safe 2702 in 2703 { 2704 // v must be normalized 2705 assert(v.length > 1, "v must not be empty"); 2706 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2707 assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]"); 2708 assert(quotient.length == u.length - v.length, 2709 "quotient must be of equal length of u - v"); 2710 if (mayOverflow) 2711 { 2712 assert(u[$-1] == 0, "Invalid value at u[$ - 1]"); 2713 assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]"); 2714 } 2715 2716 // Must be symmetric. Use block schoolbook division if not. 2717 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length, 2718 "Invalid length of u"); 2719 assert((mayOverflow ? u.length-1 : u.length) >= v.length, 2720 "Invalid length of u"); 2721 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1), 2722 "Invalid quotient length"); 2723 } 2724 do 2725 { 2726 if (quotient.length < FASTDIVLIMIT) 2727 { 2728 return schoolbookDivMod(quotient, u, v); 2729 } 2730 2731 // Split quotient into two halves, but keep padding in the top half 2732 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1; 2733 2734 // RECURSION 1: Calculate the high half of the quotient 2735 2736 // Note that if u and quotient were padded, they remain padded during 2737 // this call, so in contract is satisfied. 2738 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $], 2739 scratch, mayOverflow); 2740 2741 // quotient[k..$] is our guess at the high quotient. 2742 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the 2743 // first remainder. u[0 .. 2*k] is the low part. 2744 2745 // Calculate the full first remainder to be 2746 // remainder - highQuotient * lowDivisor 2747 // reducing highQuotient until the remainder is positive. 2748 // The low part of the remainder, u[0 .. k], cannot be altered by this. 2749 2750 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k, 2751 scratch[0 .. quotient.length], mayOverflow); 2752 2753 // RECURSION 2: Calculate the low half of the quotient 2754 // The full first remainder is now in u[0 .. k + v.length]. 2755 2756 if (u[k + v.length - 1] & 0x8000_0000) 2757 { 2758 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001. 2759 // This means we need an extra quotient word for the next recursion. 2760 // We need to restore the invariant for the recursive calls. 2761 // We do this by padding both u and quotient. Extending u is trivial, 2762 // because the higher words will not be used again. But for the 2763 // quotient, we're clobbering the low word of the high quotient, 2764 // so we need save it, and add it back in after the recursive call. 2765 2766 auto clobberedQuotient = quotient[k]; 2767 u[k+v.length] = 0; 2768 2769 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1], 2770 v[k .. $], scratch, true); 2771 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k, 2772 scratch[0 .. 2 * k+1], true); 2773 2774 // Now add the quotient word that got clobbered earlier. 2775 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient); 2776 } 2777 else 2778 { 2779 // The special case has NOT happened. 2780 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $], 2781 scratch, false); 2782 2783 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length] 2784 2785 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k, 2786 scratch[0 .. 2 * k]); 2787 } 2788 } 2789 2790 // rem -= quot * v[0 .. k]. 2791 // If would make rem negative, decrease quot until rem is >= 0. 2792 // Needs (quot.length * k) scratch space to store the result of the multiply. 2793 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v, 2794 ptrdiff_t k, 2795 BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe 2796 { 2797 assert(rem.length == v.length, "rem must be as long as v"); 2798 mulInternal(scratch, quot, v[0 .. k]); 2799 uint carry = 0; 2800 if (mayOverflow) 2801 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]); 2802 else 2803 carry = subAssignSimple(rem, scratch); 2804 while (carry) 2805 { 2806 multibyteIncrementAssign!('-')(quot, 1); // quot-- 2807 carry -= multibyteAdd(rem, rem, v, 0); 2808 } 2809 } 2810 2811 // Cope with unbalanced division by performing block schoolbook division. 2812 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2813 pure nothrow @safe 2814 { 2815 import core.memory : GC; 2816 assert(quotient.length == u.length - v.length, 2817 "quotient must be of equal length of u - v"); 2818 assert(v.length > 1, "v must not be empty"); 2819 assert(u.length >= v.length, "u must be longer or of equal length as v"); 2820 assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]"); 2821 assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]"); 2822 BigDigit [] scratch = new BigDigit[v.length + 1]; 2823 2824 // Perform block schoolbook division, with 'v.length' blocks. 2825 auto m = u.length - v.length; 2826 while (m > v.length) 2827 { 2828 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0; 2829 BigDigit saveq; 2830 if (mayOverflow) 2831 { 2832 u[m + v.length] = 0; 2833 saveq = quotient[m]; 2834 } 2835 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)], 2836 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow); 2837 if (mayOverflow) 2838 { 2839 assert(quotient[m] == 0, "quotient must not be 0"); 2840 quotient[m] = saveq; 2841 } 2842 m -= v.length; 2843 } 2844 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch); 2845 () @trusted { GC.free(scratch.ptr); } (); 2846 } 2847 2848 @system unittest 2849 { 2850 version (none) 2851 { 2852 import core.stdc.stdio; 2853 2854 void printBiguint(const uint [] data) 2855 { 2856 char [] buff = biguintToHex(new char[data.length*9], data, '_'); 2857 printf("%.*s\n", cast(int) buff.length, buff.ptr); 2858 } 2859 2860 void printDecimalBigUint(BigUint data) 2861 { 2862 auto str = data.toDecimalString(0); 2863 printf("%.*s\n", cast(int) str.length, str.ptr); 2864 } 2865 } 2866 2867 uint [] a, b; 2868 a = new uint[43]; 2869 b = new uint[179]; 2870 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i; 2871 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546; 2872 2873 a[$-1] |= 0x8000_0000; 2874 uint [] r = new uint[a.length]; 2875 uint [] q = new uint[b.length-a.length+1]; 2876 2877 divModInternal(q, r, b, a); 2878 q = q[0..$-1]; 2879 uint [] r1 = r.dup; 2880 uint [] q1 = q.dup; 2881 blockDivMod(q, b, a); 2882 r = b[0 .. a.length]; 2883 assert(r[] == r1[]); 2884 assert(q[] == q1[]); 2885 } 2886 2887 // biguintToOctal 2888 @safe unittest 2889 { 2890 enum bufSize = 5 * BigDigitBits / 3 + 1; 2891 auto buf = new char[bufSize]; 2892 size_t i; 2893 BigDigit[] data = [ 342391 ]; 2894 2895 // Basic functionality with single word 2896 i = biguintToOctal(buf, data); 2897 assert(i == bufSize - 7 && buf[i .. $] == "1234567"); 2898 2899 // Test carrying bits between words 2900 data = [ 0x77053977, 0x39770539, 0x00000005 ]; 2901 i = biguintToOctal(buf, data); 2902 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567"); 2903 2904 // Test carried bits in the last word 2905 data = [ 0x80000000 ]; 2906 i = biguintToOctal(buf, data); 2907 assert(buf[i .. $] == "20000000000"); 2908 2909 // Test boundary between 3rd and 4th word where the number of bits is 2910 // divisible by 3 and no bits should be carried. 2911 // 2912 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the 2913 // rollover happens, so that if any bugs happen in wrongly adding the carry 2914 // to the next word, non-zero bits will show up in the output. 2915 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ]; 2916 i = biguintToOctal(buf, data); 2917 assert(buf[i .. $] == "2060000000001400000000030000000000"); 2918 2919 // Boundary case: 0 2920 data = [ 0 ]; 2921 i = biguintToOctal(buf, data); 2922 assert(buf[i .. $] == "0"); 2923 }