1 /** Fundamental operations for arbitrary-precision arithmetic 2 * 3 * These functions are for internal use only. 4 */ 5 /* Copyright Don Clugston 2008 - 2010. 6 * Distributed under the Boost Software License, Version 1.0. 7 * (See accompanying file LICENSE_1_0.txt or copy at 8 * http://www.boost.org/LICENSE_1_0.txt) 9 */ 10 /* References: 11 "Modern Computer Arithmetic" (MCA) is the primary reference for all 12 algorithms used in this library. 13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 14 Version 0.5.9, (Oct 2010). 15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 16 Max-Planck Institute fuer Informatik, (Oct 1998). 17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.", 18 INRIA 4664, (Dec 2002). 19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?", 20 http://bodrato.it/papers (2006). 21 - A. Fog, "Optimizing subroutines in assembly language", 22 www.agner.org/optimize (2008). 23 - A. Fog, "The microarchitecture of Intel and AMD CPU's", 24 www.agner.org/optimize (2008). 25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs 26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008). 27 28 Idioms: 29 Many functions in this module use 30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)' 31 in order to disable implicit conversion. 32 33 */ 34 module std.internal.math.biguintcore; 35 36 version (D_InlineAsm_X86) 37 { 38 static import std.internal.math.biguintx86; 39 } 40 static import std.internal.math.biguintnoasm; 41 42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT, 43 KARATSUBASQUARELIMIT; 44 45 alias multibyteAdd = multibyteAddSub!('+'); 46 alias multibyteSub = multibyteAddSub!('-'); 47 48 private import std.traits; 49 private import std.range.primitives; 50 public import std.ascii : LetterCase; 51 import std.range.primitives; 52 import std.traits; 53 54 private: 55 56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for 57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's 58 // used by the other architectures too. 59 // See comments below in case it has to be refactored. 60 version (X86) 61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry) 62 { 63 // must be checked before, otherwise D_InlineAsm_X86 is true. 64 if (__ctfe) 65 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 66 // Runtime. 67 else version (D_InlineAsm_X86) 68 return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry); 69 // Runtime if no asm available. 70 else 71 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 72 } 73 // Any other architecture 74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub; 75 76 version (X86) 77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) 78 { 79 if (__ctfe) 80 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 81 else version (D_InlineAsm_X86) 82 return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry); 83 else 84 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 85 } 86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign; 87 88 version (X86) 89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits) 90 { 91 if (__ctfe) 92 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 93 else version (D_InlineAsm_X86) 94 return std.internal.math.biguintx86.multibyteShl(dest, src, numbits); 95 else 96 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 97 } 98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl; 99 100 version (X86) 101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits) 102 { 103 if (__ctfe) 104 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 105 else version (D_InlineAsm_X86) 106 std.internal.math.biguintx86.multibyteShr(dest, src, numbits); 107 else 108 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 109 } 110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr; 111 112 version (X86) 113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 114 { 115 if (__ctfe) 116 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 117 else version (D_InlineAsm_X86) 118 return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry); 119 else 120 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 121 } 122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul; 123 124 version (X86) 125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 126 { 127 if (__ctfe) 128 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 129 else version (D_InlineAsm_X86) 130 return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry); 131 else 132 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 133 } 134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd; 135 136 version (X86) 137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right) 138 { 139 if (__ctfe) 140 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 141 else version (D_InlineAsm_X86) 142 std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right); 143 else 144 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 145 } 146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate; 147 148 version (X86) 149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow) 150 { 151 if (__ctfe) 152 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 153 else version (D_InlineAsm_X86) 154 return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow); 155 else 156 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 157 } 158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign; 159 160 version (X86) 161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src) 162 { 163 if (__ctfe) 164 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 165 else version (D_InlineAsm_X86) 166 std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src); 167 else 168 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 169 } 170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares; 171 172 version (X86) 173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x) 174 { 175 if (__ctfe) 176 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 177 else version (D_InlineAsm_X86) 178 std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x); 179 else 180 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 181 } 182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate; 183 184 version (X86) 185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x) 186 { 187 if (__ctfe) 188 std.internal.math.biguintnoasm.multibyteSquare(result, x); 189 else version (D_InlineAsm_X86) 190 std.internal.math.biguintx86.multibyteSquare(result, x); 191 else 192 std.internal.math.biguintnoasm.multibyteSquare(result, x); 193 } 194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare; 195 196 // Limits for when to switch between algorithms. 197 // Half the size of the data cache. 198 @nogc nothrow pure @safe size_t getCacheLimit() 199 { 200 import core.cpuid : dataCaches; 201 return dataCaches[0].size * 1024 / 2; 202 } 203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division 204 205 206 // These constants are used by shift operations 207 static if (BigDigit.sizeof == int.sizeof) 208 { 209 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 } 210 alias BIGHALFDIGIT = ushort; 211 } 212 else static if (BigDigit.sizeof == long.sizeof) 213 { 214 alias BIGHALFDIGIT = uint; 215 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 } 216 } 217 else static assert(0, "Unsupported BigDigit size"); 218 219 import std.exception : assumeUnique; 220 import std.traits : isIntegral; 221 enum BigDigitBits = BigDigit.sizeof*8; 222 template maxBigDigits(T) 223 if (isIntegral!T) 224 { 225 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof; 226 } 227 228 static immutable BigDigit[] ZERO = [0]; 229 static immutable BigDigit[] ONE = [1]; 230 static immutable BigDigit[] TWO = [2]; 231 static immutable BigDigit[] TEN = [10]; 232 233 234 public: 235 236 /// BigUint performs memory management and wraps the low-level calls. 237 struct BigUint 238 { 239 private: 240 pure invariant() 241 { 242 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ), 243 "Invariant requires data to not empty or zero"); 244 } 245 246 immutable(BigDigit) [] data = ZERO; 247 248 this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe 249 { 250 data = x; 251 } 252 package(std) // used from: std.bigint 253 this(T)(T x) pure nothrow @safe scope if (isIntegral!T) 254 { 255 opAssign(x); 256 } 257 258 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc { 259 return assumeUnique(input); 260 }; 261 public: 262 // Length in uints 263 @property size_t uintLength() pure nothrow const @safe @nogc scope 264 { 265 static if (BigDigit.sizeof == uint.sizeof) 266 { 267 return data.length; 268 } 269 else static if (BigDigit.sizeof == ulong.sizeof) 270 { 271 return data.length * 2 - 272 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0); 273 } 274 } 275 @property size_t ulongLength() pure nothrow const @safe @nogc scope 276 { 277 static if (BigDigit.sizeof == uint.sizeof) 278 { 279 return (data.length + 1) >> 1; 280 } 281 else static if (BigDigit.sizeof == ulong.sizeof) 282 { 283 return data.length; 284 } 285 } 286 287 // The value at (cast(ulong[]) data)[n] 288 ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope 289 { 290 static if (BigDigit.sizeof == int.sizeof) 291 { 292 if (data.length == n*2 + 1) return data[n*2]; 293 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 ); 294 } 295 else static if (BigDigit.sizeof == long.sizeof) 296 { 297 return data[n]; 298 } 299 } 300 301 uint peekUint(size_t n) pure nothrow const @safe @nogc scope 302 { 303 static if (BigDigit.sizeof == int.sizeof) 304 { 305 return data[n]; 306 } 307 else 308 { 309 immutable x = data[n >> 1]; 310 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x; 311 } 312 } 313 314 /// 315 void opAssign(Tulong)(Tulong u) pure nothrow @safe scope if (is (Tulong == ulong)) 316 { 317 if (u == 0) data = ZERO; 318 else if (u == 1) data = ONE; 319 else if (u == 2) data = TWO; 320 else if (u == 10) data = TEN; 321 else 322 { 323 static if (BigDigit.sizeof == int.sizeof) 324 { 325 uint ulo = cast(uint)(u & 0xFFFF_FFFF); 326 uint uhi = cast(uint)(u >> 32); 327 if (uhi == 0) 328 { 329 data = [ulo]; 330 } 331 else 332 { 333 data = [ulo, uhi]; 334 } 335 } 336 else static if (BigDigit.sizeof == long.sizeof) 337 { 338 data = [u]; 339 } 340 } 341 } 342 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope 343 { 344 this.data = y.data; 345 } 346 347 /// 348 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope 349 { 350 if (data.length != y.data.length) 351 return (data.length > y.data.length) ? 1 : -1; 352 size_t k = highestDifferentDigit(data, y.data); 353 if (data[k] == y.data[k]) 354 return 0; 355 return data[k] > y.data[k] ? 1 : -1; 356 } 357 358 /// 359 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope if (is (Tulong == ulong)) 360 { 361 if (data.length > maxBigDigits!Tulong) 362 return 1; 363 364 foreach_reverse (i; 0 .. maxBigDigits!Tulong) 365 { 366 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits)); 367 if (tmp == 0) 368 if (data.length >= i+1) 369 { 370 // Since ZERO is [0], so we cannot simply return 1 here, as 371 // data[i] would be 0 for i == 0 in that case. 372 return (data[i] > 0) ? 1 : 0; 373 } 374 else 375 continue; 376 else 377 if (i+1 > data.length) 378 return -1; 379 else if (tmp != data[i]) 380 return data[i] > tmp ? 1 : -1; 381 } 382 return 0; 383 } 384 385 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope 386 { 387 return y.data[] == data[]; 388 } 389 390 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope 391 { 392 if (data.length > 2) 393 return false; 394 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 395 uint yhi = cast(uint)(y >> 32); 396 if (data.length == 2 && data[1]!=yhi) 397 return false; 398 if (data.length == 1 && yhi != 0) 399 return false; 400 return (data[0] == ylo); 401 } 402 403 bool isZero() pure const nothrow @safe @nogc scope 404 { 405 return data.length == 1 && data[0] == 0; 406 } 407 408 size_t numBytes() pure nothrow const @safe @nogc scope 409 { 410 return data.length * BigDigit.sizeof; 411 } 412 413 // the extra bytes are added to the start of the string 414 char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope 415 { 416 immutable predictlength = 20+20*(data.length/2); // just over 19 417 char [] buff = new char[frontExtraBytes + predictlength]; 418 ptrdiff_t sofar = biguintToDecimal(buff, data.dup); 419 return buff[sofar-frontExtraBytes..$]; 420 } 421 422 /** Convert to a hex string, printing a minimum number of digits 'minPadding', 423 * allocating an additional 'frontExtraBytes' at the start of the string. 424 * Padding is done with padChar, which may be '0' or ' '. 425 * 'separator' is a digit separation character. If non-zero, it is inserted 426 * between every 8 digits. 427 * Separator characters do not contribute to the minPadding. 428 */ 429 char [] toHexString(int frontExtraBytes, char separator = 0, 430 int minPadding=0, char padChar = '0', 431 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope 432 { 433 // Calculate number of extra padding bytes 434 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof) 435 ? minPadding - data.length * 2 * BigDigit.sizeof : 0; 436 437 // Length not including separator bytes 438 size_t lenBytes = data.length * 2 * BigDigit.sizeof; 439 440 // Calculate number of separator bytes 441 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0; 442 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0; 443 444 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes]; 445 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase); 446 if (extraPad > 0) 447 { 448 if (separator) 449 { 450 size_t start = frontExtraBytes; // first index to pad 451 if (extraPad &7) 452 { 453 // Do 1 to 7 extra zeros. 454 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar; 455 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator); 456 start += (extraPad & 7) + 1; 457 } 458 for (int i=0; i< (extraPad >> 3); ++i) 459 { 460 buff[start .. start + 8] = padChar; 461 buff[start + 8] = (padChar == ' ' ? ' ' : separator); 462 start += 9; 463 } 464 } 465 else 466 { 467 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar; 468 } 469 } 470 int z = frontExtraBytes; 471 if (lenBytes > minPadding) 472 { 473 // Strip leading zeros. 474 ptrdiff_t maxStrip = lenBytes - minPadding; 475 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0) 476 { 477 ++z; 478 --maxStrip; 479 } 480 } 481 if (padChar!='0') 482 { 483 // Convert leading zeros into padChars. 484 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k) 485 { 486 if (buff[k]=='0') buff[k]=padChar; 487 } 488 } 489 return buff[z-frontExtraBytes..$]; 490 } 491 492 /** 493 * Convert to an octal string. 494 */ 495 char[] toOctalString() pure nothrow @safe const scope 496 { 497 auto predictLength = 1 + data.length*BigDigitBits / 3; 498 char[] buff = new char[predictLength]; 499 size_t firstNonZero = biguintToOctal(buff, data); 500 return buff[firstNonZero .. $]; 501 } 502 503 // return false if invalid character found 504 bool fromHexString(Range)(Range s) scope if ( 505 isBidirectionalRange!Range && isSomeChar!(ElementType!Range)) 506 { 507 import std.range : walkLength; 508 509 //Strip leading zeros 510 while (!s.empty && s.front == '0') 511 s.popFront; 512 513 if (s.empty) 514 { 515 data = ZERO; 516 return true; 517 } 518 519 immutable len = (s.save.walkLength + 15) / 4; 520 auto tmp = new BigDigit[len + 1]; 521 uint part, sofar, partcount; 522 523 foreach_reverse (character; s) 524 { 525 if (character == '_') 526 continue; 527 528 uint x; 529 if (character >= '0' && character <= '9') 530 { 531 x = character - '0'; 532 } 533 else if (character >= 'A' && character <= 'F') 534 { 535 x = character - 'A' + 10; 536 } 537 else if (character >= 'a' && character <= 'f') 538 { 539 x = character - 'a' + 10; 540 } 541 else 542 { 543 return false; 544 } 545 546 part >>= 4; 547 part |= (x << (32 - 4)); 548 ++partcount; 549 550 if (partcount == 8) 551 { 552 tmp[sofar] = part; 553 ++sofar; 554 partcount = 0; 555 part = 0; 556 } 557 } 558 if (part) 559 { 560 for ( ; partcount != 8; ++partcount) part >>= 4; 561 tmp[sofar] = part; 562 ++sofar; 563 } 564 if (sofar == 0) 565 data = ZERO; 566 else 567 data = trustedAssumeUnique(tmp[0 .. sofar]); 568 569 return true; 570 } 571 572 // return true if OK; false if erroneous characters found 573 bool fromDecimalString(Range)(Range s) scope if ( 574 isForwardRange!Range && isSomeChar!(ElementType!Range)) 575 { 576 import std.range : walkLength; 577 578 while (!s.empty && s.front == '0') 579 { 580 s.popFront; 581 } 582 583 if (s.empty) 584 { 585 data = ZERO; 586 return true; 587 } 588 589 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19; 590 auto tmp = new BigDigit[predict_length]; 591 592 tmp.length = biguintFromDecimal(tmp, s); 593 594 data = trustedAssumeUnique(tmp); 595 return true; 596 } 597 598 void fromMagnitude(Range)(Range magnitude) scope 599 if (isInputRange!Range 600 && (isForwardRange!Range || hasLength!Range) 601 && isUnsigned!(ElementType!Range)) 602 { 603 while (!magnitude.empty && magnitude.front == 0) 604 magnitude.popFront; 605 static if (hasLength!Range) 606 immutable inputLen = magnitude.length; 607 else 608 immutable inputLen = magnitude.save.walkLength; 609 if (!inputLen) 610 { 611 this.data = ZERO; 612 return; 613 } 614 // `magnitude` has its most significant element first but BigUint.data 615 // stores the most significant last. 616 BigDigit[] newDigits; 617 alias E = ElementType!Range; 618 static if (E.sizeof == BigDigit.sizeof) 619 { 620 newDigits = new BigDigit[inputLen]; 621 foreach_reverse (ref digit; newDigits) 622 { 623 digit = magnitude.front; 624 magnitude.popFront(); 625 } 626 } 627 else static if (E.sizeof < BigDigit.sizeof) 628 { 629 enum elementsPerDigit = BigDigit.sizeof / E.sizeof; 630 newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit]; 631 immutable remainder = inputLen % elementsPerDigit; 632 // If there is a remainder specially assemble the most significant digit. 633 if (remainder) 634 { 635 BigDigit tmp = magnitude.front; 636 magnitude.popFront(); 637 foreach (_; 1 .. remainder) 638 { 639 tmp = (tmp << (E.sizeof * 8)) | magnitude.front; 640 magnitude.popFront(); 641 } 642 newDigits[$-1] = tmp; 643 } 644 // Assemble full digits from most to least significant. 645 foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)]) 646 { 647 BigDigit tmp; 648 static foreach (n; 0 .. elementsPerDigit) 649 { 650 tmp |= cast(BigDigit) magnitude.front << 651 ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8); 652 magnitude.popFront(); 653 } 654 digit = tmp; 655 } 656 } 657 else static if (E.sizeof > BigDigit.sizeof) 658 { 659 enum digitsPerElement = E.sizeof / BigDigit.sizeof; 660 newDigits = new BigDigit[inputLen * digitsPerElement]; 661 size_t i = newDigits.length - 1; 662 foreach (element; magnitude) 663 { 664 static foreach (n; 0 .. digitsPerElement) 665 newDigits[i - n] = 666 cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8)); 667 i -= digitsPerElement; 668 } 669 while (newDigits[$-1] == 0) 670 newDigits = newDigits[0 .. $-1]; 671 } 672 else 673 static assert(0); 674 this.data = trustedAssumeUnique(newDigits); 675 return; 676 } 677 678 nothrow pure @safe unittest 679 { 680 immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD]; 681 // Internal representation is most-significant-last but `fromMagnitude` 682 // argument is most-significant-first. 683 immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005]; 684 BigUint b; 685 // Test with reference magnitude. 686 b.fromMagnitude(referenceMagnitude); 687 assert(b.data == referenceData); 688 // Test ubyte array. 689 import std.bitmanip : nativeToBigEndian; 690 ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~ 691 nativeToBigEndian(referenceMagnitude[1]) ~ 692 nativeToBigEndian(referenceMagnitude[2]); 693 b.data = ZERO; 694 b.fromMagnitude(ubyteMagnitude); 695 assert(b.data == referenceData); 696 // Test ulong array. 697 static if (BigDigit.sizeof == uint.sizeof) 698 immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]), 699 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2], 700 ]; 701 else static if (BigDigit.sizeof == ulong.sizeof) 702 alias ulongMagnitude = referenceMagnitude; 703 b.data = ZERO; 704 b.fromMagnitude(ulongMagnitude); 705 assert(b.data == referenceData); 706 } 707 708 //////////////////////// 709 // 710 // All of these member functions create a new BigUint. 711 712 // return x >> y 713 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope 714 if (op == ">>" && is (Tulong == ulong)) 715 { 716 assert(y > 0, "Can not right shift BigUint by 0"); 717 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 718 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO); 719 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 720 if (bits == 0) 721 { 722 return BigUint(data[words..$]); 723 } 724 else 725 { 726 uint [] result = new BigDigit[data.length - words]; 727 multibyteShr(result, data[words..$], bits); 728 729 if (result.length > 1 && result[$-1] == 0) 730 return BigUint(trustedAssumeUnique(result[0 .. $-1])); 731 else 732 return BigUint(trustedAssumeUnique(result)); 733 } 734 } 735 736 // return x << y 737 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope 738 if (op == "<<" && is (Tulong == ulong)) 739 { 740 assert(y > 0, "Can not left shift BigUint by 0"); 741 if (isZero()) return this; 742 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 743 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max), 744 "Shift result exceeds temporary store"); 745 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 746 BigDigit [] result = new BigDigit[data.length + words+1]; 747 result[0 .. words] = 0; 748 if (bits == 0) 749 { 750 result[words .. words+data.length] = data[]; 751 return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 752 } 753 else 754 { 755 immutable c = multibyteShl(result[words .. words+data.length], data, bits); 756 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 757 result[$-1] = c; 758 return BigUint(trustedAssumeUnique(result)); 759 } 760 } 761 762 // If wantSub is false, return x + y, leaving sign unchanged 763 // If wantSub is true, return abs(x - y), negating sign if x < y 764 static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, 765 bool wantSub, ref bool sign) pure nothrow @safe if (is(Tulong == ulong)) 766 { 767 BigUint r; 768 if (wantSub) 769 { // perform a subtraction 770 if (x.data.length > 2) 771 { 772 // subInt returns GC allocated array, can be safely cast to immutable 773 r.data = (() @trusted => cast(immutable) subInt(x.data, y))(); 774 } 775 else 776 { // could change sign! 777 ulong xx = x.data[0]; 778 if (x.data.length > 1) 779 xx += (cast(ulong) x.data[1]) << 32; 780 ulong d; 781 if (xx <= y) 782 { 783 d = y - xx; 784 sign = !sign; 785 } 786 else 787 { 788 d = xx - y; 789 } 790 if (d == 0) 791 { 792 r = 0UL; 793 sign = false; 794 return r; 795 } 796 if (d > uint.max) 797 { 798 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)]; 799 } 800 else 801 { 802 r.data = [cast(uint)(d & 0xFFFF_FFFF)]; 803 } 804 } 805 } 806 else 807 { 808 // addInt returns GC allocated array, can be safely cast to immutable 809 r.data = (() @trusted => cast(immutable) addInt(x.data, y))(); 810 } 811 return r; 812 } 813 814 // If wantSub is false, return x + y, leaving sign unchanged. 815 // If wantSub is true, return abs(x - y), negating sign if x<y 816 static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign) 817 pure nothrow @safe 818 { 819 BigUint r; 820 if (wantSub) 821 { // perform a subtraction 822 bool negative; 823 // sub returns GC allocated array, can be safely cast to immutable 824 r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))(); 825 sign ^= negative; 826 if (r.isZero()) 827 { 828 sign = false; 829 } 830 } 831 else 832 { 833 // add returns GC allocated array, can be safely cast to immutable 834 r.data = (() @trusted => cast(immutable) add(x.data, y.data))(); 835 } 836 return r; 837 } 838 839 840 // return x*y. 841 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe 842 { 843 if (y == 0 || x == 0) return BigUint(ZERO); 844 static if (T.sizeof * 8 <= 32) 845 uint hi = 0; 846 else 847 uint hi = cast(uint) (y >>> 32); 848 uint lo = cast(uint) (y & 0xFFFF_FFFF); 849 uint [] result = new BigDigit[x.data.length+1+(hi != 0)]; 850 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0); 851 if (hi != 0) 852 { 853 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1], 854 x.data, hi, 0); 855 } 856 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 857 } 858 859 /* return x * y. 860 */ 861 static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe 862 { 863 if (y == 0 || x == 0) 864 return BigUint(ZERO); 865 auto len = x.data.length + y.data.length; 866 BigDigit [] result = new BigDigit[len]; 867 if (y.data.length > x.data.length) 868 { 869 mulInternal(result, y.data, x.data); 870 } 871 else 872 { 873 if (x.data[]==y.data[]) squareInternal(result, x.data); 874 else mulInternal(result, x.data, y.data); 875 } 876 // the highest element could be zero, 877 // in which case we need to reduce the length 878 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 879 } 880 881 // return x / y 882 static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe 883 if ( is(immutable T == immutable uint) ) 884 { 885 uint y = y_; 886 if (y == 1) 887 return x; 888 uint [] result = new BigDigit[x.data.length]; 889 if ((y&(-y))==y) 890 { 891 assert(y != 0, "BigUint division by zero"); 892 // perfect power of 2 893 uint b = 0; 894 for (;y != 1; y>>=1) 895 { 896 ++b; 897 } 898 multibyteShr(result, x.data, b); 899 } 900 else 901 { 902 result[] = x.data[]; 903 cast(void) multibyteDivAssign(result, y, 0); 904 } 905 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 906 } 907 908 static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe 909 if ( is(immutable T == immutable ulong) ) 910 { 911 if (y <= uint.max) 912 return divInt!uint(x, cast(uint) y); 913 if (x.data.length < 2) 914 return BigUint(ZERO); 915 uint hi = cast(uint)(y >>> 32); 916 uint lo = cast(uint)(y & 0xFFFF_FFFF); 917 immutable uint[2] z = [lo, hi]; 918 BigDigit[] result = new BigDigit[x.data.length - z.length + 1]; 919 divModInternal(result, null, x.data, z[]); 920 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 921 } 922 923 // return x % y 924 static uint modInt(T)(scope BigUint x, T y_) pure if ( is(immutable T == immutable uint) ) 925 { 926 import core.memory : GC; 927 uint y = y_; 928 assert(y != 0, "% 0 not allowed"); 929 if ((y&(-y)) == y) 930 { // perfect power of 2 931 return x.data[0] & (y-1); 932 } 933 else 934 { 935 // horribly inefficient - malloc, copy, & store are unnecessary. 936 uint [] wasteful = new BigDigit[x.data.length]; 937 wasteful[] = x.data[]; 938 immutable rem = multibyteDivAssign(wasteful, y, 0); 939 () @trusted { GC.free(wasteful.ptr); } (); 940 return rem; 941 } 942 } 943 944 // return x / y 945 static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe 946 { 947 if (y.data.length > x.data.length) 948 return BigUint(ZERO); 949 if (y.data.length == 1) 950 return divInt(x, y.data[0]); 951 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 952 divModInternal(result, null, x.data, y.data); 953 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 954 } 955 956 // return x % y 957 static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe 958 { 959 if (y.data.length > x.data.length) return x; 960 if (y.data.length == 1) 961 { 962 return BigUint([modInt(x, y.data[0])]); 963 } 964 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 965 BigDigit [] rem = new BigDigit[y.data.length]; 966 divModInternal(result, rem, x.data, y.data); 967 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 968 } 969 970 // Return x / y in quotient, x % y in remainder 971 static void divMod(BigUint x, scope BigUint y, 972 out BigUint quotient, out BigUint remainder) pure nothrow @safe 973 { 974 /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */ 975 if (y.data.length > x.data.length) 976 { 977 quotient = 0uL; 978 remainder = x; 979 } 980 else if (y.data.length == 1) 981 { 982 quotient = divInt(x, y.data[0]); 983 remainder = BigUint([modInt(x, y.data[0])]); 984 } 985 else 986 { 987 BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1]; 988 BigDigit[] rem = new BigDigit[y.data.length]; 989 divModInternal(quot, rem, x.data, y.data); 990 quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot))); 991 remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 992 } 993 } 994 995 // return x op y 996 static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign) 997 pure nothrow @safe if (op == "|" || op == "^" || op == "&") 998 { 999 auto d1 = includeSign(x.data, y.uintLength, xSign); 1000 auto d2 = includeSign(y.data, x.uintLength, ySign); 1001 1002 foreach (i; 0 .. d1.length) 1003 { 1004 mixin("d1[i] " ~ op ~ "= d2[i];"); 1005 } 1006 1007 mixin("resultSign = xSign " ~ op ~ " ySign;"); 1008 1009 if (resultSign) 1010 { 1011 twosComplement(d1, d1); 1012 } 1013 1014 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1))); 1015 } 1016 1017 /** 1018 * Return a BigUint which is x raised to the power of y. 1019 * Method: Powers of 2 are removed from x, then left-to-right binary 1020 * exponentiation is used. 1021 * Memory allocation is minimized: at most one temporary BigUint is used. 1022 */ 1023 static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe 1024 { 1025 // Deal with the degenerate cases first. 1026 if (y == 0) return BigUint(ONE); 1027 if (y == 1) return x; 1028 if (x == 0 || x == 1) return x; 1029 1030 BigUint result; 1031 1032 // Simplify, step 1: Remove all powers of 2. 1033 uint firstnonzero = firstNonZeroDigit(x.data); 1034 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits)) 1035 // where BigDigitBits = BigDigit.sizeof * 8 1036 1037 // See if x[firstnonzero..$] can now fit into a single digit. 1038 bool singledigit = ((x.data.length - firstnonzero) == 1); 1039 // If true, then x0 is that digit 1040 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits)) 1041 BigDigit x0 = x.data[firstnonzero]; 1042 assert(x0 != 0, "pow(0, y) not allowed"); 1043 // Length of the non-zero portion 1044 size_t nonzerolength = x.data.length - firstnonzero; 1045 ulong y0; 1046 uint evenbits = 0; // number of even bits in the bottom of x 1047 while (!(x0 & 1)) 1048 { 1049 x0 >>= 1; 1050 ++evenbits; 1051 } 1052 1053 if (x.data.length- firstnonzero == 2) 1054 { 1055 // Check for a single digit straddling a digit boundary 1056 const BigDigit x1 = x.data[firstnonzero+1]; 1057 if ((x1 >> evenbits) == 0) 1058 { 1059 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits)); 1060 singledigit = true; 1061 } 1062 } 1063 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1064 1065 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end 1066 1067 // Simplify, step 2: For singledigits, see if we can trivially reduce y 1068 1069 BigDigit finalMultiplier = 1UL; 1070 1071 if (singledigit) 1072 { 1073 // x fits into a single digit. Raise it to the highest power we can 1074 // that still fits into a single digit, then reduce the exponent accordingly. 1075 // We're quite likely to have a residual multiply at the end. 1076 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100. 1077 // and 5^^13 still fits into a uint. 1078 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK); 1079 if (x0 == 1) 1080 { // Perfect power of 2 1081 result = 1UL; 1082 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1083 } 1084 immutable p = highestPowerBelowUintMax(x0); 1085 if (y <= p) 1086 { // Just do it with pow 1087 result = cast(ulong) intpow(x0, y); 1088 if (evenbits + firstnonzero == 0) 1089 return result; 1090 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1091 } 1092 y0 = y / p; 1093 finalMultiplier = intpow(x0, y - y0*p); 1094 x0 = intpow(x0, p); 1095 // Result is x0 1096 nonzerolength = 1; 1097 } 1098 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1099 1100 // Perform a crude check for overflow and allocate result buffer. 1101 // The length required is y * lg2(x) bits. 1102 // which will always fit into y*x.length digits. But this is 1103 // a gross overestimate if x is small (length 1 or 2) and the highest 1104 // digit is nearly empty. 1105 // A better estimate is: 1106 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits, 1107 // and the first term is always between 1108 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and 1109 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS 1110 // For single digit payloads, we already have 1111 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1112 // and x0 is almost a full digit, so it's a tight estimate. 1113 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y 1114 // Note that the divisions must be rounded up. 1115 1116 // Estimated length in BigDigits 1117 immutable estimatelength = singledigit 1118 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y 1119 : x.data.length * y; 1120 // Imprecise check for overflow. Makes the extreme cases easier to debug 1121 // (less extreme overflow will result in an out of memory error). 1122 if (estimatelength > uint.max/(4*BigDigit.sizeof)) 1123 assert(0, "Overflow in BigInt.pow"); 1124 1125 // The result buffer includes space for all the trailing zeros 1126 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength]; 1127 1128 // Do all the powers of 2! 1129 size_t result_start = cast(size_t)( firstnonzero * y 1130 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0)); 1131 1132 resultBuffer[0 .. result_start] = 0; 1133 BigDigit [] t1 = resultBuffer[result_start..$]; 1134 BigDigit [] r1; 1135 1136 if (singledigit) 1137 { 1138 r1 = t1[0 .. 1]; 1139 r1[0] = x0; 1140 y = y0; 1141 } 1142 else 1143 { 1144 // It's not worth right shifting by evenbits unless we also shrink the length after each 1145 // multiply or squaring operation. That might still be worthwhile for large y. 1146 r1 = t1[0 .. x.data.length - firstnonzero]; 1147 r1[0..$] = x.data[firstnonzero..$]; 1148 } 1149 1150 if (y>1) 1151 { // Set r1 = r1 ^^ y. 1152 // The secondary buffer only needs space for the multiplication results 1153 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start]; 1154 BigDigit [] r2; 1155 1156 int shifts = 63; // num bits in a long 1157 while (!(y & 0x8000_0000_0000_0000L)) 1158 { 1159 y <<= 1; 1160 --shifts; 1161 } 1162 y <<=1; 1163 1164 while (y != 0) 1165 { 1166 // For each bit of y: Set r1 = r1 * r1 1167 // If the bit is 1, set r1 = r1 * x 1168 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5. 1169 // Optimization opportunity: if more than 2 bits in y are set, 1170 // it's usually possible to reduce the number of multiplies 1171 // by caching odd powers of x. eg for y = 54, 1172 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2 1173 r2 = t2[0 .. r1.length*2]; 1174 squareInternal(r2, r1); 1175 if (y & 0x8000_0000_0000_0000L) 1176 { 1177 r1 = t1[0 .. r2.length + nonzerolength]; 1178 if (singledigit) 1179 { 1180 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0); 1181 } 1182 else 1183 { 1184 mulInternal(r1, r2, x.data[firstnonzero..$]); 1185 } 1186 } 1187 else 1188 { 1189 r1 = t1[0 .. r2.length]; 1190 r1[] = r2[]; 1191 } 1192 y <<=1; 1193 shifts--; 1194 } 1195 while (shifts>0) 1196 { 1197 r2 = t2[0 .. r1.length * 2]; 1198 squareInternal(r2, r1); 1199 r1 = t1[0 .. r2.length]; 1200 r1[] = r2[]; 1201 --shifts; 1202 } 1203 } 1204 1205 if (finalMultiplier != 1) 1206 { 1207 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0); 1208 if (carry) 1209 { 1210 r1 = t1[0 .. r1.length + 1]; 1211 r1[$-1] = carry; 1212 } 1213 } 1214 if (evenshiftbits) 1215 { 1216 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits); 1217 if (carry != 0) 1218 { 1219 r1 = t1[0 .. r1.length + 1]; 1220 r1[$ - 1] = carry; 1221 } 1222 } 1223 while (r1[$ - 1]==0) 1224 { 1225 r1=r1[0 .. $ - 1]; 1226 } 1227 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length])); 1228 } 1229 1230 // Implement toHash so that BigUint works properly as an AA key. 1231 size_t toHash() const @nogc nothrow pure @safe scope 1232 { 1233 return .hashOf(data); 1234 } 1235 1236 } // end BigUint 1237 1238 @safe pure nothrow unittest 1239 { 1240 // ulong comparison test 1241 BigUint a = [1]; 1242 assert(a == 1); 1243 // https://issues.dlang.org/show_bug.cgi?id=9548 1244 assert(a < 0x8000_0000_0000_0000UL); 1245 1246 // https://issues.dlang.org/show_bug.cgi?id=12234 1247 BigUint z = [0]; 1248 assert(z == 0UL); 1249 assert(!(z > 0UL)); 1250 assert(!(z < 0UL)); 1251 } 1252 1253 // https://issues.dlang.org/show_bug.cgi?id=16223 1254 @system pure nothrow unittest 1255 { 1256 BigUint a = [3]; 1257 int b = 5; 1258 assert(BigUint.mulInt(a,b) == 15); 1259 } 1260 1261 // Remove leading zeros from x, to restore the BigUint invariant 1262 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe 1263 { 1264 size_t k = x.length; 1265 while (k>1 && x[k - 1]==0) --k; 1266 return x[0 .. k]; 1267 } 1268 1269 pure @safe unittest 1270 { 1271 BigUint r = BigUint([5]); 1272 BigUint t = BigUint([7]); 1273 BigUint s = BigUint.mod(r, t); 1274 assert(s == 5); 1275 } 1276 1277 1278 @safe pure unittest 1279 { 1280 BigUint r; 1281 r = 5UL; 1282 assert(r.peekUlong(0) == 5UL); 1283 assert(r.peekUint(0) == 5U); 1284 r = 0x1234_5678_9ABC_DEF0UL; 1285 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL); 1286 assert(r.peekUint(0) == 0x9ABC_DEF0U); 1287 } 1288 1289 1290 // Pow tests 1291 pure @safe unittest 1292 { 1293 BigUint r, s; 1294 r.fromHexString("80000000_00000001"); 1295 s = BigUint.pow(r, 5); 1296 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000" 1297 ~ "_00000002_80000000_00000001"); 1298 assert(s == r); 1299 s = 10UL; 1300 s = BigUint.pow(s, 39); 1301 r.fromDecimalString("1000000000000000000000000000000000000000"); 1302 assert(s == r); 1303 r.fromHexString("1_E1178E81_00000000"); 1304 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds 1305 1306 r.fromDecimalString("000_000_00"); 1307 assert(r == 0); 1308 r.fromDecimalString("0007"); 1309 assert(r == 7); 1310 r.fromDecimalString("0"); 1311 assert(r == 0); 1312 } 1313 1314 // Radix conversion tests 1315 @safe pure unittest 1316 { 1317 BigUint r; 1318 r.fromHexString("1_E1178E81_00000000"); 1319 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000"); 1320 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000"); 1321 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000"); 1322 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000"); 1323 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000"); 1324 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000"); 1325 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000"); 1326 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000"); 1327 r = 0UL; 1328 assert(r.toHexString(0, '_', 0) == "0"); 1329 assert(r.toHexString(0, '_', 7) == "0000000"); 1330 assert(r.toHexString(0, '_', 7, ' ') == " 0"); 1331 assert(r.toHexString(0, '#', 9) == "0#00000000"); 1332 assert(r.toHexString(0, 0, 9) == "000000000"); 1333 } 1334 1335 // 1336 @safe pure unittest 1337 { 1338 BigUint r; 1339 r.fromHexString("1_E1178E81_00000000"); 1340 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000"); 1341 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000"); 1342 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000"); 1343 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000"); 1344 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000"); 1345 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000"); 1346 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000"); 1347 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000"); 1348 r = 0UL; 1349 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0"); 1350 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000"); 1351 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0"); 1352 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000"); 1353 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000"); 1354 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000"); 1355 } 1356 1357 1358 private: 1359 void twosComplement(const(BigDigit) [] x, BigDigit[] result) 1360 pure nothrow @safe 1361 { 1362 foreach (i; 0 .. x.length) 1363 { 1364 result[i] = ~x[i]; 1365 } 1366 result[x.length..$] = BigDigit.max; 1367 1368 foreach (i; 0 .. result.length) 1369 { 1370 if (result[i] == BigDigit.max) 1371 { 1372 result[i] = 0; 1373 } 1374 else 1375 { 1376 result[i] += 1; 1377 break; 1378 } 1379 } 1380 } 1381 1382 // Encode BigInt as BigDigit array (sign and 2's complement) 1383 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign) 1384 pure nothrow @safe 1385 { 1386 size_t length = (x.length > minSize) ? x.length : minSize; 1387 BigDigit [] result = new BigDigit[length]; 1388 if (sign) 1389 { 1390 twosComplement(x, result); 1391 } 1392 else 1393 { 1394 result[0 .. x.length] = x; 1395 } 1396 return result; 1397 } 1398 1399 // works for any type 1400 T intpow(T)(T x, ulong n) pure nothrow @safe 1401 { 1402 T p; 1403 1404 switch (n) 1405 { 1406 case 0: 1407 p = 1; 1408 break; 1409 1410 case 1: 1411 p = x; 1412 break; 1413 1414 case 2: 1415 p = x * x; 1416 break; 1417 1418 default: 1419 p = 1; 1420 while (1) 1421 { 1422 if (n & 1) 1423 p *= x; 1424 n >>= 1; 1425 if (!n) 1426 break; 1427 x *= x; 1428 } 1429 break; 1430 } 1431 return p; 1432 } 1433 1434 1435 // returns the maximum power of x that will fit in a uint. 1436 int highestPowerBelowUintMax(uint x) pure nothrow @safe 1437 { 1438 assert(x > 1, "x must be greater than 1"); 1439 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 1440 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7]; 1441 if (x<24) return maxpwr[x-2]; 1442 if (x<41) return 6; 1443 if (x<85) return 5; 1444 if (x<256) return 4; 1445 if (x<1626) return 3; 1446 if (x<65_536) return 2; 1447 return 1; 1448 } 1449 1450 // returns the maximum power of x that will fit in a ulong. 1451 int highestPowerBelowUlongMax(uint x) pure nothrow @safe 1452 { 1453 assert(x > 1, "x must be greater than 1"); 1454 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 1455 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 1456 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 1457 12, 12, 12, 12, 12, 12, 12, 12, 12]; 1458 if (x<41) return maxpwr[x-2]; 1459 if (x<57) return 11; 1460 if (x<85) return 10; 1461 if (x<139) return 9; 1462 if (x<256) return 8; 1463 if (x<566) return 7; 1464 if (x<1626) return 6; 1465 if (x<7132) return 5; 1466 if (x<65_536) return 4; 1467 if (x<2_642_246) return 3; 1468 return 2; 1469 } 1470 1471 version (StdUnittest) 1472 { 1473 1474 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe 1475 { 1476 int pwr = 1; 1477 for (ulong q = x;x*q < cast(ulong) uint.max; ) 1478 { 1479 q*=x; ++pwr; 1480 } 1481 return pwr; 1482 } 1483 1484 @safe pure unittest 1485 { 1486 assert(highestPowerBelowUintMax(10)==9); 1487 for (int k=82; k<88; ++k) 1488 { 1489 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k)); 1490 } 1491 } 1492 1493 } 1494 1495 1496 /* General unsigned subtraction routine for bigints. 1497 * Sets result = x - y. If the result is negative, negative will be true. 1498 * Returns: 1499 * unique memory 1500 */ 1501 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative) 1502 pure nothrow @safe 1503 { 1504 if (x.length == y.length) 1505 { 1506 // There's a possibility of cancellation, if x and y are almost equal. 1507 ptrdiff_t last = highestDifferentDigit(x, y); 1508 BigDigit [] result = new BigDigit[last+1]; 1509 if (x[last] < y[last]) 1510 { // we know result is negative 1511 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0); 1512 *negative = true; 1513 } 1514 else 1515 { // positive or zero result 1516 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0); 1517 *negative = false; 1518 } 1519 while (result.length > 1 && result[$-1] == 0) 1520 { 1521 result = result[0..$-1]; 1522 } 1523 // if (result.length >1 && result[$-1]==0) return result[0..$-1]; 1524 return result; 1525 } 1526 // Lengths are different 1527 const(BigDigit) [] large, small; 1528 if (x.length < y.length) 1529 { 1530 *negative = true; 1531 large = y; small = x; 1532 } 1533 else 1534 { 1535 *negative = false; 1536 large = x; small = y; 1537 } 1538 // result.length will be equal to larger length, or could decrease by 1. 1539 1540 1541 BigDigit [] result = new BigDigit[large.length]; 1542 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0); 1543 result[small.length..$] = large[small.length..$]; 1544 if (carry) 1545 { 1546 multibyteIncrementAssign!('-')(result[small.length..$], carry); 1547 } 1548 while (result.length > 1 && result[$-1] == 0) 1549 { 1550 result = result[0..$-1]; 1551 } 1552 return result; 1553 } 1554 1555 1556 /* 1557 * return a + b 1558 * Returns: 1559 * unique memory 1560 */ 1561 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe 1562 { 1563 const(BigDigit) [] x, y; 1564 if (a.length < b.length) 1565 { 1566 x = b; y = a; 1567 } 1568 else 1569 { 1570 x = a; y = b; 1571 } 1572 // now we know x.length > y.length 1573 // create result. add 1 in case it overflows 1574 BigDigit [] result = new BigDigit[x.length + 1]; 1575 1576 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0); 1577 if (x.length != y.length) 1578 { 1579 result[y.length..$-1]= x[y.length..$]; 1580 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry); 1581 } 1582 if (carry) 1583 { 1584 result[$-1] = carry; 1585 return result; 1586 } 1587 else 1588 return result[0..$-1]; 1589 } 1590 1591 /** return x + y 1592 */ 1593 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow 1594 { 1595 uint hi = cast(uint)(y >>> 32); 1596 uint lo = cast(uint)(y& 0xFFFF_FFFF); 1597 auto len = x.length; 1598 if (x.length < 2 && hi != 0) ++len; 1599 BigDigit [] result = new BigDigit[len+1]; 1600 result[0 .. x.length] = x[]; 1601 if (x.length < 2 && hi != 0) 1602 { 1603 result[1]=hi; 1604 hi=0; 1605 } 1606 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo); 1607 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi); 1608 if (carry) 1609 { 1610 result[$-1] = carry; 1611 return result; 1612 } 1613 else 1614 return result[0..$-1]; 1615 } 1616 1617 /** Return x - y. 1618 * x must be greater than y. 1619 */ 1620 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe 1621 { 1622 uint hi = cast(uint)(y >>> 32); 1623 uint lo = cast(uint)(y & 0xFFFF_FFFF); 1624 BigDigit [] result = new BigDigit[x.length]; 1625 result[] = x[]; 1626 multibyteIncrementAssign!('-')(result[], lo); 1627 if (hi) 1628 multibyteIncrementAssign!('-')(result[1..$], hi); 1629 if (result[$-1] == 0) 1630 return result[0..$-1]; 1631 else 1632 return result; 1633 } 1634 1635 /** General unsigned multiply routine for bigints. 1636 * Sets result = x * y. 1637 * 1638 * The length of y must not be larger than the length of x. 1639 * Different algorithms are used, depending on the lengths of x and y. 1640 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the 1641 * unbalanced case. (But I doubt it would be faster in practice). 1642 * 1643 */ 1644 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 1645 pure nothrow @safe 1646 { 1647 import core.memory : GC; 1648 assert( result.length == x.length + y.length, 1649 "result array must have enough space to store computed result"); 1650 assert( y.length > 0, "y must not be empty"); 1651 assert( x.length >= y.length, "x must be greater or equal than y"); 1652 if (y.length <= KARATSUBALIMIT) 1653 { 1654 // Small multiplier, we'll just use the asm classic multiply. 1655 if (y.length == 1) 1656 { // Trivial case, no cache effects to worry about 1657 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0); 1658 return; 1659 } 1660 1661 immutable CACHELIMIT = getCacheLimit; 1662 if (x.length + y.length < CACHELIMIT) 1663 return mulSimple(result, x, y); 1664 1665 // If x is so big that it won't fit into the cache, we divide it into chunks 1666 // Every chunk must be greater than y.length. 1667 // We make the first chunk shorter, if necessary, to ensure this. 1668 1669 auto chunksize = CACHELIMIT / y.length; 1670 immutable residual = x.length % chunksize; 1671 if (residual < y.length) 1672 { 1673 chunksize -= y.length; 1674 } 1675 1676 // Use schoolbook multiply. 1677 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y); 1678 auto done = chunksize; 1679 1680 while (done < x.length) 1681 { 1682 // result[done .. done+ylength] already has a value. 1683 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done; 1684 BigDigit [KARATSUBALIMIT] partial; 1685 partial[0 .. y.length] = result[done .. done+y.length]; 1686 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y); 1687 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]); 1688 done += chunksize; 1689 } 1690 return; 1691 } 1692 1693 immutable half = (x.length >> 1) + (x.length & 1); 1694 if (2*y.length*y.length <= x.length*x.length) 1695 { 1696 // UNBALANCED MULTIPLY 1697 // Use school multiply to cut into quasi-squares of Karatsuba-size 1698 // or larger. The ratio of the two sides of the 'square' must be 1699 // between 1.414:1 and 1:1. Use Karatsuba on each chunk. 1700 // 1701 // For maximum performance, we want the ratio to be as close to 1702 // 1:1 as possible. To achieve this, we can either pad x or y. 1703 // The best choice depends on the modulus x%y. 1704 auto numchunks = x.length / y.length; 1705 auto chunksize = y.length; 1706 auto extra = x.length % y.length; 1707 auto maxchunk = chunksize + extra; 1708 bool paddingY; // true = we're padding Y, false = we're padding X. 1709 bool isExtraSmall = extra * extra * 2 < y.length * y.length; 1710 if (numchunks == 1 && isExtraSmall) 1711 { 1712 // We divide (x_first_half * y) and (x_last_half * y) 1713 // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)). 1714 // (1.414 ~ 1.707)/2:1 is balanced. 1715 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length]; 1716 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1717 scratchbuff = scratchbuff[0 .. $ - y.length]; 1718 mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff); 1719 partial[] = result[half .. half + y.length]; 1720 mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff); 1721 BigDigit c = addAssignSimple(result[half .. half + y.length], partial); 1722 if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c); 1723 () @trusted { GC.free(scratchbuff.ptr); } (); 1724 } 1725 else 1726 { 1727 if (isExtraSmall) 1728 { 1729 // The leftover bit is small enough that it should be incorporated 1730 // in the existing chunks. 1731 // Make all the chunks a tiny bit bigger 1732 // (We're padding y with zeros) 1733 chunksize += extra / numchunks; 1734 extra = x.length - chunksize*numchunks; 1735 // there will probably be a few left over. 1736 // Every chunk will either have size chunksize, or chunksize+1. 1737 maxchunk = chunksize + 1; 1738 paddingY = true; 1739 assert(chunksize + extra + chunksize *(numchunks-1) == x.length, 1740 "Unexpected size"); 1741 } 1742 else 1743 { 1744 // the extra bit is large enough that it's worth making a new chunk. 1745 // (This means we're padding x with zeros, when doing the first one). 1746 maxchunk = chunksize; 1747 ++numchunks; 1748 paddingY = false; 1749 assert(extra + chunksize *(numchunks-1) == x.length, 1750 "Unexpected size"); 1751 } 1752 // We make the buffer a bit bigger so we have space for the partial sums. 1753 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length]; 1754 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1755 scratchbuff = scratchbuff[0 .. $ - y.length]; 1756 size_t done; // how much of X have we done so far? 1757 if (paddingY) 1758 { 1759 // If the first chunk is bigger, do it first. We're padding y. 1760 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )], 1761 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 1762 done = chunksize + (extra > 0 ? 1 : 0); 1763 if (extra) --extra; 1764 } 1765 else 1766 { // We're padding X. Begin with the extra bit. 1767 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff); 1768 done = extra; 1769 extra = 0; 1770 } 1771 immutable basechunksize = chunksize; 1772 while (done < x.length) 1773 { 1774 chunksize = basechunksize + (extra > 0 ? 1 : 0); 1775 if (extra) --extra; 1776 partial[] = result[done .. done+y.length]; 1777 mulKaratsuba(result[done .. done + y.length + chunksize], 1778 x[done .. done+chunksize], y, scratchbuff); 1779 addAssignSimple(result[done .. done + y.length + chunksize], partial); 1780 done += chunksize; 1781 } 1782 () @trusted { GC.free(scratchbuff.ptr); } (); 1783 } 1784 } 1785 else 1786 { 1787 // Balanced. Use Karatsuba directly. 1788 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1789 mulKaratsuba(result, x, y, scratchbuff); 1790 () @trusted { GC.free(scratchbuff.ptr); } (); 1791 } 1792 } 1793 1794 // https://issues.dlang.org/show_bug.cgi?id=20493 1795 @safe unittest 1796 { 1797 // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300) 1798 // the number itself isn't important, only the amount of digits, so we do a simpler 1799 // multiplication of the same size, analogous to: 1800 // 11111111 * 11111111 = 0123456787654321 1801 // but instead of base 10, it's in base `BigDigit` 1802 1803 BigDigit[398] x = 1; 1804 BigDigit[236] y = 1; 1805 BigDigit[x.length + y.length] result; 1806 mulInternal(result[], x[], y[]); 1807 1808 // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0] 1809 BigDigit[x.length + y.length] expected = y.length; 1810 foreach (BigDigit i; 0 .. y.length) 1811 { 1812 expected[i] = i+1; 1813 expected[$-1-i] = i; 1814 } 1815 1816 assert(result == expected); 1817 } 1818 1819 /** General unsigned squaring routine for BigInts. 1820 * Sets result = x*x. 1821 * NOTE: If the highest half-digit of x is zero, the highest digit of result will 1822 * also be zero. 1823 */ 1824 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe 1825 { 1826 import core.memory : GC; 1827 // Squaring is potentially half a multiply, plus add the squares of 1828 // the diagonal elements. 1829 assert(result.length == 2*x.length, 1830 "result needs to have twice the capacity of x"); 1831 if (x.length <= KARATSUBASQUARELIMIT) 1832 { 1833 if (x.length == 1) 1834 { 1835 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0); 1836 return; 1837 } 1838 return squareSimple(result, x); 1839 } 1840 // The nice thing about squaring is that it always stays balanced 1841 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1842 squareKaratsuba(result, x, scratchbuff); 1843 () @trusted { GC.free(scratchbuff.ptr); } (); 1844 } 1845 1846 1847 import core.bitop : bsr; 1848 1849 /// if remainder is null, only calculate quotient. 1850 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u, 1851 const BigDigit [] v) pure nothrow @safe 1852 { 1853 import core.memory : GC; 1854 assert(quotient.length == u.length - v.length + 1, 1855 "Invalid quotient length"); 1856 assert(remainder == null || remainder.length == v.length, 1857 "Invalid remainder"); 1858 assert(v.length > 1, "v must have more than 1 element"); 1859 assert(u.length >= v.length, "u must be as longer or longer than v"); 1860 1861 // Normalize by shifting v left just enough so that 1862 // its high-order bit is on, and shift u left the 1863 // same amount. The highest bit of u will never be set. 1864 1865 BigDigit [] vn = new BigDigit[v.length]; 1866 BigDigit [] un = new BigDigit[u.length + 1]; 1867 // How much to left shift v, so that its MSB is set. 1868 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]); 1869 if (s != 0) 1870 { 1871 multibyteShl(vn, v, s); 1872 un[$-1] = multibyteShl(un[0..$-1], u, s); 1873 } 1874 else 1875 { 1876 vn[] = v[]; 1877 un[0..$-1] = u[]; 1878 un[$-1] = 0; 1879 } 1880 if (quotient.length<FASTDIVLIMIT) 1881 { 1882 schoolbookDivMod(quotient, un, vn); 1883 } 1884 else 1885 { 1886 blockDivMod(quotient, un, vn); 1887 } 1888 1889 // Unnormalize remainder, if required. 1890 if (remainder != null) 1891 { 1892 if (s == 0) remainder[] = un[0 .. vn.length]; 1893 else multibyteShr(remainder, un[0 .. vn.length+1], s); 1894 } 1895 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } (); 1896 } 1897 1898 pure @safe unittest 1899 { 1900 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000]; 1901 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000]; 1902 uint [] q = new uint[u.length - v.length + 1]; 1903 uint [] r = new uint[2]; 1904 divModInternal(q, r, u, v); 1905 assert(q[]==[0xFFFF_FFFFu, 0]); 1906 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]); 1907 u = [0, 0xFFFF_FFFE, 0x8000_0001]; 1908 v = [0xFFFF_FFFF, 0x8000_0000]; 1909 divModInternal(q, r, u, v); 1910 } 1911 1912 1913 // Converts a big uint to a hexadecimal string. 1914 // 1915 // Optionally, a separator character (eg, an underscore) may be added between 1916 // every 8 digits. 1917 // buff.length must be data.length*8 if separator is zero, 1918 // or data.length*9 if separator is non-zero. It will be completely filled. 1919 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0, 1920 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 1921 { 1922 int x=0; 1923 for (ptrdiff_t i=data.length - 1; i >= 0; --i) 1924 { 1925 toHexZeroPadded(buff[x .. x+8], data[i], letterCase); 1926 x+=8; 1927 if (separator) 1928 { 1929 if (i>0) buff[x] = separator; 1930 ++x; 1931 } 1932 } 1933 return buff; 1934 } 1935 1936 /** 1937 * Convert a big uint into an octal string. 1938 * 1939 * Params: 1940 * buff = The destination buffer for the octal string. Must be large enough to 1941 * store the result, including leading zeroes, which is 1942 * ceil(data.length * BigDigitBits / 3) characters. 1943 * The buffer is filled from back to front, starting from `buff[$-1]`. 1944 * data = The biguint to be converted. 1945 * 1946 * Returns: The index of the leading non-zero digit in `buff`. Will be 1947 * `buff.length - 1` if the entire big uint is zero. 1948 */ 1949 size_t biguintToOctal(char[] buff, const(BigDigit)[] data) 1950 pure nothrow @safe @nogc 1951 { 1952 ubyte carry = 0; 1953 int shift = 0; 1954 size_t penPos = buff.length - 1; 1955 size_t lastNonZero = buff.length - 1; 1956 1957 pragma(inline) void output(uint digit) @nogc nothrow 1958 { 1959 if (digit != 0) 1960 lastNonZero = penPos; 1961 buff[penPos--] = cast(char)('0' + digit); 1962 } 1963 1964 foreach (bigdigit; data) 1965 { 1966 if (shift < 0) 1967 { 1968 // Some bits were carried over from previous word. 1969 assert(shift > -3, "shift must be greater than -3"); 1970 output(((bigdigit << -shift) | carry) & 0b111); 1971 shift += 3; 1972 assert(shift > 0, "shift must be 1 or greater"); 1973 } 1974 1975 while (shift <= BigDigitBits - 3) 1976 { 1977 output((bigdigit >>> shift) & 0b111); 1978 shift += 3; 1979 } 1980 1981 if (shift < BigDigitBits) 1982 { 1983 // Some bits need to be carried forward. 1984 carry = (bigdigit >>> shift) & 0b11; 1985 } 1986 shift -= BigDigitBits; 1987 assert(shift >= -2 && shift <= 0, "shift must in [-2,0]"); 1988 } 1989 1990 if (shift < 0) 1991 { 1992 // Last word had bits that haven't been output yet. 1993 assert(shift > -3, "Shift must be greater than -3"); 1994 output(carry); 1995 } 1996 1997 return lastNonZero; 1998 } 1999 2000 /** Convert a big uint into a decimal string. 2001 * 2002 * Params: 2003 * data The biguint to be converted. Will be destroyed. 2004 * buff The destination buffer for the decimal string. Must be 2005 * large enough to store the result, including leading zeros. 2006 * Will be filled backwards, starting from buff[$-1]. 2007 * 2008 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length. 2009 * Returns: 2010 * the lowest index of buff which was used. 2011 */ 2012 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe 2013 { 2014 ptrdiff_t sofar = buff.length; 2015 // Might be better to divide by (10^38/2^32) since that gives 38 digits for 2016 // the price of 3 divisions and a shr; this version only gives 27 digits 2017 // for 3 divisions. 2018 while (data.length>1) 2019 { 2020 uint rem = multibyteDivAssign(data, 10_0000_0000, 0); 2021 itoaZeroPadded(buff[sofar-9 .. sofar], rem); 2022 sofar -= 9; 2023 if (data[$-1] == 0 && data.length > 1) 2024 { 2025 data.length = data.length - 1; 2026 } 2027 } 2028 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]); 2029 sofar -= 10; 2030 // and strip off the leading zeros 2031 while (sofar != buff.length-1 && buff[sofar] == '0') 2032 sofar++; 2033 return sofar; 2034 } 2035 2036 /** Convert a decimal string into a big uint. 2037 * 2038 * Params: 2039 * data The biguint to be receive the result. Must be large enough to 2040 * store the result. 2041 * s The decimal string. May contain _ or 0 .. 9 2042 * 2043 * The required length for the destination buffer is slightly less than 2044 * 1 + s.length/log2(10) = 1 + s.length/3.3219. 2045 * 2046 * Returns: 2047 * the highest index of data which was used. 2048 */ 2049 int biguintFromDecimal(Range)(BigDigit[] data, Range s) 2050 if ( 2051 isInputRange!Range && 2052 isSomeChar!(ElementType!Range) && 2053 !isInfinite!Range) 2054 in 2055 { 2056 static if (hasLength!Range) 2057 assert((data.length >= 2) || (data.length == 1 && s.length == 1), 2058 "data has a invalid length"); 2059 } 2060 do 2061 { 2062 import std.conv : ConvException; 2063 2064 // Convert to base 1e19 = 10_000_000_000_000_000_000. 2065 // (this is the largest power of 10 that will fit into a long). 2066 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219. 2067 // 485 bits will only just fit into 146 decimal digits. 2068 // As we convert the string, we record the number of digits we've seen in base 19: 2069 // hi is the number of digits/19, lo is the extra digits (0 to 18). 2070 // TODO: This is inefficient for very large strings (it is O(n^^2)). 2071 // We should take advantage of fast multiplication once the numbers exceed 2072 // Karatsuba size. 2073 uint lo = 0; // number of powers of digits, 0 .. 18 2074 uint x = 0; 2075 ulong y = 0; 2076 uint hi = 0; // number of base 1e19 digits 2077 data[0] = 0; // initially number is 0. 2078 if (data.length > 1) 2079 data[1] = 0; 2080 2081 foreach (character; s) 2082 { 2083 if (character == '_') 2084 continue; 2085 2086 if (character < '0' || character > '9') 2087 throw new ConvException("invalid digit"); 2088 x *= 10; 2089 x += character - '0'; 2090 ++lo; 2091 if (lo == 9) 2092 { 2093 y = x; 2094 x = 0; 2095 } 2096 if (lo == 18) 2097 { 2098 y *= 10_0000_0000; 2099 y += x; 2100 x = 0; 2101 } 2102 if (lo == 19) 2103 { 2104 y *= 10; 2105 y += x; 2106 x = 0; 2107 // Multiply existing number by 10^19, then add y1. 2108 if (hi>0) 2109 { 2110 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A 2111 ++hi; 2112 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000 2113 ++hi; 2114 } 2115 else 2116 hi = 2; 2117 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2118 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2119 if (c != 0) 2120 { 2121 data[hi]=c; 2122 ++hi; 2123 } 2124 y = 0; 2125 lo = 0; 2126 } 2127 } 2128 // Now set y = all remaining digits. 2129 if (lo >= 18) 2130 { 2131 } 2132 else if (lo >= 9) 2133 { 2134 for (int k=9; k<lo; ++k) y*=10; 2135 y+=x; 2136 } 2137 else 2138 { 2139 for (int k=0; k<lo; ++k) y*=10; 2140 y+=x; 2141 } 2142 if (lo != 0) 2143 { 2144 if (hi == 0) 2145 { 2146 data[0] = cast(uint) y; 2147 if (data.length == 1) 2148 { 2149 hi = 1; 2150 } 2151 else 2152 { 2153 data[1] = cast(uint)(y >>> 32); 2154 hi=2; 2155 } 2156 } 2157 else 2158 { 2159 while (lo>0) 2160 { 2161 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0); 2162 if (c != 0) 2163 { 2164 data[hi]=c; 2165 ++hi; 2166 } 2167 --lo; 2168 } 2169 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2170 if (y > 0xFFFF_FFFFL) 2171 { 2172 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2173 } 2174 if (c != 0) 2175 { 2176 data[hi]=c; 2177 ++hi; 2178 } 2179 } 2180 } 2181 while (hi>1 && data[hi-1]==0) 2182 --hi; 2183 return hi; 2184 } 2185 2186 2187 // ------------------------ 2188 // These in-place functions are only for internal use; they are incompatible 2189 // with COW. 2190 2191 // Classic 'schoolbook' multiplication. 2192 void mulSimple(BigDigit[] result, const(BigDigit) [] left, 2193 const(BigDigit)[] right) pure nothrow @safe 2194 in 2195 { 2196 assert(result.length == left.length + right.length, 2197 "Result must be able to store left + right"); 2198 assert(right.length>1, "right must not be empty"); 2199 } 2200 do 2201 { 2202 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0); 2203 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 2204 } 2205 2206 // Classic 'schoolbook' squaring 2207 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe 2208 in 2209 { 2210 assert(result.length == 2*x.length, "result must be twice as long as x"); 2211 assert(x.length>1, "x must not be empty"); 2212 } 2213 do 2214 { 2215 multibyteSquare(result, x); 2216 } 2217 2218 2219 // add two uints of possibly different lengths. Result must be as long 2220 // as the larger length. 2221 // Returns carry (0 or 1). 2222 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right) 2223 pure nothrow @safe 2224 in 2225 { 2226 assert(result.length == left.length, 2227 "result and left must be of the same length"); 2228 assert(left.length >= right.length, 2229 "left must be longer or of equal length to right"); 2230 assert(right.length > 0, "right must not be empty"); 2231 } 2232 do 2233 { 2234 uint carry = multibyteAdd(result[0 .. right.length], 2235 left[0 .. right.length], right, 0); 2236 if (right.length < left.length) 2237 { 2238 result[right.length .. left.length] = left[right.length .. $]; 2239 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 2240 } 2241 return carry; 2242 } 2243 2244 /* result = result - right 2245 * Returns carry = 1 if result was less than right. 2246 */ 2247 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2248 pure nothrow @safe 2249 { 2250 assert(result.length >= right.length, 2251 "result must be longer or of equal length to right"); 2252 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0); 2253 if (c && result.length > right.length) 2254 c = multibyteIncrementAssign!('-')(result[right.length .. $], c); 2255 return c; 2256 } 2257 2258 /* result = result + right 2259 */ 2260 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2261 pure nothrow @safe 2262 { 2263 assert(result.length >= right.length, 2264 "result must be longer or of equal length to right"); 2265 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0); 2266 if (c && result.length > right.length) 2267 c = multibyteIncrementAssign!('+')(result[right.length .. $], c); 2268 return c; 2269 } 2270 2271 /* performs result += wantSub? - right : right; 2272 */ 2273 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right, 2274 bool wantSub) pure nothrow @safe 2275 { 2276 if (wantSub) 2277 return subAssignSimple(result, right); 2278 else 2279 return addAssignSimple(result, right); 2280 } 2281 2282 2283 // return true if x<y, considering leading zeros 2284 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe 2285 { 2286 assert(x.length >= y.length, 2287 "x must be longer or of equal length to y"); 2288 auto k = x.length-1; 2289 while (x[k]==0 && k >= y.length) 2290 --k; 2291 if (k >= y.length) 2292 return false; 2293 while (k>0 && x[k]==y[k]) 2294 --k; 2295 return x[k] < y[k]; 2296 } 2297 2298 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y. 2299 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 2300 pure nothrow @safe 2301 { 2302 assert(result.length == ((x.length >= y.length) ? x.length : y.length), 2303 "result must capable to store the maximum of x and y"); 2304 2305 size_t minlen; 2306 bool negative; 2307 if (x.length >= y.length) 2308 { 2309 minlen = y.length; 2310 negative = less(x, y); 2311 } 2312 else 2313 { 2314 minlen = x.length; 2315 negative = !less(y, x); 2316 } 2317 const (BigDigit)[] large, small; 2318 if (negative) 2319 { 2320 large = y; small = x; 2321 } 2322 else 2323 { 2324 large = x; small = y; 2325 } 2326 2327 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0); 2328 if (x.length != y.length) 2329 { 2330 result[minlen .. large.length]= large[minlen..$]; 2331 result[large.length..$] = 0; 2332 if (carry) 2333 multibyteIncrementAssign!('-')(result[minlen..$], carry); 2334 } 2335 return negative; 2336 } 2337 2338 /* Determine how much space is required for the temporaries 2339 * when performing a Karatsuba multiplication. 2340 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see: 2341 * https://issues.dlang.org/show_bug.cgi?id=20493 2342 */ 2343 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe 2344 { 2345 return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4; 2346 } 2347 2348 /* Sets result = x*y, using Karatsuba multiplication. 2349 * x must be longer or equal to y. 2350 * Valid only for balanced multiplies, where x is not shorter than y. 2351 * It is superior to schoolbook multiplication if and only if 2352 * sqrt(2)*y.length > x.length > y.length. 2353 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2) 2354 * The maximum allowable length of x and y is uint.max; but better algorithms 2355 * should be used far before that length is reached. 2356 * Params: 2357 * scratchbuff An array long enough to store all the temporaries. Will be destroyed. 2358 */ 2359 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x, 2360 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe 2361 { 2362 assert(x.length >= y.length, "x must be greater or equal to y"); 2363 assert(result.length < uint.max, "Operands too large"); 2364 assert(result.length == x.length + y.length, 2365 "result must be as large as x + y"); 2366 if (x.length <= KARATSUBALIMIT) 2367 { 2368 return mulSimple(result, x, y); 2369 } 2370 // Must be almost square (otherwise, a schoolbook iteration is better) 2371 assert(2L * y.length * y.length > (x.length-1) * (x.length-1), 2372 "Bigint Internal Error: Asymmetric Karatsuba"); 2373 2374 // The subtractive version of Karatsuba multiply uses the following result: 2375 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid) 2376 // where mid = (x0-x1)*(y0-y1) 2377 // requiring 3 multiplies of length N, instead of 4. 2378 // The advantage of the subtractive over the additive version is that 2379 // the mid multiply cannot exceed length N. But there are subtleties: 2380 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we 2381 // retain all of the leading zeros in the subtractions 2382 2383 // half length, round up. 2384 auto half = (x.length >> 1) + (x.length & 1); 2385 2386 const(BigDigit) [] x0 = x[0 .. half]; 2387 const(BigDigit) [] x1 = x[half .. $]; 2388 const(BigDigit) [] y0 = y[0 .. half]; 2389 const(BigDigit) [] y1 = y[half .. $]; 2390 BigDigit [] mid = scratchbuff[0 .. half*2]; 2391 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2392 BigDigit [] resultLow = result[0 .. 2*half]; 2393 BigDigit [] resultHigh = result[2*half .. $]; 2394 // initially use result to store temporaries 2395 BigDigit [] xdiff= result[0 .. half]; 2396 BigDigit [] ydiff = result[half .. half*2]; 2397 2398 // First, we calculate mid, and sign of mid 2399 immutable bool midNegative = inplaceSub(xdiff, x0, x1) 2400 ^ inplaceSub(ydiff, y0, y1); 2401 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff); 2402 2403 // Low half of result gets x0 * y0. High half gets x1 * y1 2404 2405 mulKaratsuba(resultLow, x0, y0, newscratchbuff); 2406 2407 if (2L * y1.length * y1.length < x1.length * x1.length) 2408 { 2409 // an asymmetric situation has been created. 2410 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1. 2411 // Applying one schoolbook multiply gives us two pieces each 1.2:1 2412 if (y1.length <= KARATSUBALIMIT) 2413 mulSimple(resultHigh, x1, y1); 2414 else 2415 { 2416 // divide x1 in two, then use schoolbook multiply on the two pieces. 2417 auto quarter = (x1.length >> 1) + (x1.length & 1); 2418 immutable ysmaller = (quarter >= y1.length); 2419 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1, 2420 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff); 2421 // Save the part which will be overwritten. 2422 immutable ysmaller2 = ((x1.length - quarter) >= y1.length); 2423 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length]; 2424 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1, 2425 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]); 2426 2427 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]); 2428 } 2429 } 2430 else 2431 mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 2432 2433 /* We now have result = x0y0 + (N*N)*x1y1 2434 Before adding or subtracting mid, we must calculate 2435 result += N * (x0y0 + x1y1) 2436 We can do this with three half-length additions. With a = x0y0, b = x1y1: 2437 aHI aLO 2438 + aHI aLO 2439 + bHI bLO 2440 + bHI bLO 2441 = R3 R2 R1 R0 2442 R1 = aHI + bLO + aLO 2443 R2 = aHI + bLO + aHI + carry_from_R1 2444 R3 = bHi + carry_from_R2 2445 2446 It might actually be quicker to do it in two full-length additions: 2447 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]); 2448 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]); 2449 */ 2450 BigDigit[] R1 = result[half .. half*2]; 2451 BigDigit[] R2 = result[half*2 .. half*3]; 2452 BigDigit[] R3 = result[half*3..$]; 2453 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2454 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2455 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2456 if (c1+c2) 2457 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2458 if (c1+c3) 2459 multibyteIncrementAssign!('+')(R3, c1+c3); 2460 2461 // And finally we subtract mid 2462 addOrSubAssignSimple(result[half..$], mid, !midNegative); 2463 } 2464 2465 void squareKaratsuba(BigDigit [] result, const BigDigit [] x, 2466 BigDigit [] scratchbuff) pure nothrow @safe 2467 { 2468 // See mulKaratsuba for implementation comments. 2469 // Squaring is simpler, since it never gets asymmetric. 2470 assert(result.length < uint.max, "Operands too large"); 2471 assert(result.length == 2*x.length, 2472 "result must be twice the length of x"); 2473 if (x.length <= KARATSUBASQUARELIMIT) 2474 { 2475 return squareSimple(result, x); 2476 } 2477 // half length, round up. 2478 auto half = (x.length >> 1) + (x.length & 1); 2479 2480 const(BigDigit)[] x0 = x[0 .. half]; 2481 const(BigDigit)[] x1 = x[half .. $]; 2482 BigDigit [] mid = scratchbuff[0 .. half*2]; 2483 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2484 // initially use result to store temporaries 2485 BigDigit [] xdiff= result[0 .. half]; 2486 const BigDigit [] ydiff = result[half .. half*2]; 2487 2488 // First, we calculate mid. We don't need its sign 2489 inplaceSub(xdiff, x0, x1); 2490 squareKaratsuba(mid, xdiff, newscratchbuff); 2491 2492 // Set result = x0x0 + (N*N)*x1x1 2493 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff); 2494 squareKaratsuba(result[2*half .. $], x1, newscratchbuff); 2495 2496 /* result += N * (x0x0 + x1x1) 2497 Do this with three half-length additions. With a = x0x0, b = x1x1: 2498 R1 = aHI + bLO + aLO 2499 R2 = aHI + bLO + aHI + carry_from_R1 2500 R3 = bHi + carry_from_R2 2501 */ 2502 BigDigit[] R1 = result[half .. half*2]; 2503 BigDigit[] R2 = result[half*2 .. half*3]; 2504 BigDigit[] R3 = result[half*3..$]; 2505 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2506 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2507 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2508 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2509 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 2510 2511 // And finally we subtract mid, which is always positive 2512 subAssignSimple(result[half..$], mid); 2513 } 2514 2515 /* Knuth's Algorithm D, as presented in 2516 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002). 2517 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18. 2518 * Given u and v, calculates quotient = u / v, u = u % v. 2519 * v must be normalized (ie, the MSB of v must be 1). 2520 * The most significant words of quotient and u may be zero. 2521 * u[0 .. v.length] holds the remainder. 2522 */ 2523 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2524 pure nothrow @safe 2525 { 2526 assert(quotient.length == u.length - v.length, 2527 "quotient has wrong length"); 2528 assert(v.length > 1, "v must not be empty"); 2529 assert(u.length >= v.length, "u must be larger or equal to v"); 2530 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2531 assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]"); 2532 // BUG: This code only works if BigDigit is uint. 2533 uint vhi = v[$-1]; 2534 uint vlo = v[$-2]; 2535 2536 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--) 2537 { 2538 // Compute estimate of quotient[j], 2539 // qhat = (three most significant words of u)/(two most sig words of v). 2540 uint qhat; 2541 if (u[j + v.length] == vhi) 2542 { 2543 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001) 2544 qhat = uint.max; 2545 } 2546 else 2547 { 2548 uint ulo = u[j + v.length - 2]; 2549 version (D_InlineAsm_X86) 2550 { 2551 // Note: On DMD, this is only ~10% faster than the non-asm code. 2552 uint *p = &u[j + v.length - 1]; 2553 asm pure nothrow @trusted 2554 { 2555 mov EAX, p; 2556 mov EDX, [EAX+4]; 2557 mov EAX, [EAX]; 2558 div dword ptr [vhi]; 2559 mov qhat, EAX; 2560 mov ECX, EDX; 2561 div3by2correction: 2562 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo 2563 sub EAX, ulo; 2564 sbb EDX, ECX; 2565 jbe div3by2done; 2566 mov EAX, qhat; 2567 dec EAX; 2568 mov qhat, EAX; 2569 add ECX, dword ptr [vhi]; 2570 jnc div3by2correction; 2571 div3by2done: ; 2572 } 2573 } 2574 else 2575 { // version (InlineAsm) 2576 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1]; 2577 immutable bigqhat = uu / vhi; 2578 ulong rhat = uu - bigqhat * vhi; 2579 qhat = cast(uint) bigqhat; 2580 again: 2581 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo)) 2582 { 2583 --qhat; 2584 rhat += vhi; 2585 if (!(rhat & 0xFFFF_FFFF_0000_0000L)) 2586 goto again; 2587 } 2588 } // version (InlineAsm) 2589 } 2590 // Multiply and subtract. 2591 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0); 2592 2593 if (u[j+v.length] < carry) 2594 { 2595 // If we subtracted too much, add back 2596 --qhat; 2597 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0); 2598 } 2599 quotient[j] = qhat; 2600 u[j + v.length] = u[j + v.length] - carry; 2601 } 2602 } 2603 2604 // TODO: Replace with a library call 2605 void itoaZeroPadded(char[] output, uint value) 2606 pure nothrow @safe @nogc 2607 { 2608 for (auto i = output.length; i--;) 2609 { 2610 if (value < 10) 2611 { 2612 output[i] = cast(char)(value + '0'); 2613 value = 0; 2614 } 2615 else 2616 { 2617 output[i] = cast(char)(value % 10 + '0'); 2618 value /= 10; 2619 } 2620 } 2621 } 2622 2623 void toHexZeroPadded(char[] output, uint value, 2624 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 2625 { 2626 ptrdiff_t x = output.length - 1; 2627 static immutable string upperHexDigits = "0123456789ABCDEF"; 2628 static immutable string lowerHexDigits = "0123456789abcdef"; 2629 for ( ; x >= 0; --x) 2630 { 2631 if (letterCase == LetterCase.upper) 2632 { 2633 output[x] = upperHexDigits[value & 0xF]; 2634 } 2635 else 2636 { 2637 output[x] = lowerHexDigits[value & 0xF]; 2638 } 2639 value >>= 4; 2640 } 2641 } 2642 2643 // Returns the highest value of i for which left[i]!=right[i], 2644 // or 0 if left[] == right[] 2645 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right) 2646 pure nothrow @nogc @safe 2647 { 2648 assert(left.length == right.length, 2649 "left have a length equal to that of right"); 2650 for (ptrdiff_t i = left.length - 1; i>0; --i) 2651 { 2652 if (left[i] != right[i]) 2653 return i; 2654 } 2655 return 0; 2656 } 2657 2658 // Returns the lowest value of i for which x[i]!=0. 2659 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe 2660 { 2661 int k = 0; 2662 while (x[k]==0) 2663 { 2664 ++k; 2665 assert(k < x.length, "k must be less than x.length"); 2666 } 2667 return k; 2668 } 2669 2670 /* 2671 Calculate quotient and remainder of u / v using fast recursive division. 2672 v must be normalised, and must be at least half as long as u. 2673 Given u and v, v normalised, calculates quotient = u/v, u = u%v. 2674 scratch is temporary storage space, length must be >= quotient + 1. 2675 2676 Returns: 2677 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted. 2678 2679 Implements algorithm 1.8 from MCA. 2680 This algorithm has an annoying special case. After the first recursion, the 2681 highest bit of the quotient may be set. This means that in the second 2682 recursive call, the 'in' contract would be violated. (This happens only 2683 when the top quarter of u is equal to the top half of v. A base 10 2684 equivalent example of this situation is 5517/56; the first step gives 2685 55/5 = 11). To maintain the in contract, we pad a zero to the top of both 2686 u and the quotient. 'mayOverflow' indicates that that the special case 2687 has occurred. 2688 (In MCA, a different strategy is used: the in contract is weakened, and 2689 schoolbookDivMod is more general: it allows the high bit of u to be set). 2690 See also: 2691 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 2692 Max-Planck Institute fuer Informatik, (Oct 1998). 2693 */ 2694 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v, 2695 BigDigit[] scratch, bool mayOverflow = false) 2696 pure nothrow @safe 2697 in 2698 { 2699 // v must be normalized 2700 assert(v.length > 1, "v must not be empty"); 2701 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2702 assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]"); 2703 assert(quotient.length == u.length - v.length, 2704 "quotient must be of equal length of u - v"); 2705 if (mayOverflow) 2706 { 2707 assert(u[$-1] == 0, "Invalid value at u[$ - 1]"); 2708 assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]"); 2709 } 2710 2711 // Must be symmetric. Use block schoolbook division if not. 2712 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length, 2713 "Invalid length of u"); 2714 assert((mayOverflow ? u.length-1 : u.length) >= v.length, 2715 "Invalid length of u"); 2716 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1), 2717 "Invalid quotient length"); 2718 } 2719 do 2720 { 2721 if (quotient.length < FASTDIVLIMIT) 2722 { 2723 return schoolbookDivMod(quotient, u, v); 2724 } 2725 2726 // Split quotient into two halves, but keep padding in the top half 2727 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1; 2728 2729 // RECURSION 1: Calculate the high half of the quotient 2730 2731 // Note that if u and quotient were padded, they remain padded during 2732 // this call, so in contract is satisfied. 2733 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $], 2734 scratch, mayOverflow); 2735 2736 // quotient[k..$] is our guess at the high quotient. 2737 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the 2738 // first remainder. u[0 .. 2*k] is the low part. 2739 2740 // Calculate the full first remainder to be 2741 // remainder - highQuotient * lowDivisor 2742 // reducing highQuotient until the remainder is positive. 2743 // The low part of the remainder, u[0 .. k], cannot be altered by this. 2744 2745 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k, 2746 scratch[0 .. quotient.length], mayOverflow); 2747 2748 // RECURSION 2: Calculate the low half of the quotient 2749 // The full first remainder is now in u[0 .. k + v.length]. 2750 2751 if (u[k + v.length - 1] & 0x8000_0000) 2752 { 2753 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001. 2754 // This means we need an extra quotient word for the next recursion. 2755 // We need to restore the invariant for the recursive calls. 2756 // We do this by padding both u and quotient. Extending u is trivial, 2757 // because the higher words will not be used again. But for the 2758 // quotient, we're clobbering the low word of the high quotient, 2759 // so we need save it, and add it back in after the recursive call. 2760 2761 auto clobberedQuotient = quotient[k]; 2762 u[k+v.length] = 0; 2763 2764 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1], 2765 v[k .. $], scratch, true); 2766 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k, 2767 scratch[0 .. 2 * k+1], true); 2768 2769 // Now add the quotient word that got clobbered earlier. 2770 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient); 2771 } 2772 else 2773 { 2774 // The special case has NOT happened. 2775 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $], 2776 scratch, false); 2777 2778 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length] 2779 2780 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k, 2781 scratch[0 .. 2 * k]); 2782 } 2783 } 2784 2785 // rem -= quot * v[0 .. k]. 2786 // If would make rem negative, decrease quot until rem is >= 0. 2787 // Needs (quot.length * k) scratch space to store the result of the multiply. 2788 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v, 2789 ptrdiff_t k, 2790 BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe 2791 { 2792 assert(rem.length == v.length, "rem must be as long as v"); 2793 mulInternal(scratch, quot, v[0 .. k]); 2794 uint carry = 0; 2795 if (mayOverflow) 2796 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]); 2797 else 2798 carry = subAssignSimple(rem, scratch); 2799 while (carry) 2800 { 2801 multibyteIncrementAssign!('-')(quot, 1); // quot-- 2802 carry -= multibyteAdd(rem, rem, v, 0); 2803 } 2804 } 2805 2806 // Cope with unbalanced division by performing block schoolbook division. 2807 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2808 pure nothrow @safe 2809 { 2810 import core.memory : GC; 2811 assert(quotient.length == u.length - v.length, 2812 "quotient must be of equal length of u - v"); 2813 assert(v.length > 1, "v must not be empty"); 2814 assert(u.length >= v.length, "u must be longer or of equal length as v"); 2815 assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]"); 2816 assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]"); 2817 BigDigit [] scratch = new BigDigit[v.length + 1]; 2818 2819 // Perform block schoolbook division, with 'v.length' blocks. 2820 auto m = u.length - v.length; 2821 while (m > v.length) 2822 { 2823 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0; 2824 BigDigit saveq; 2825 if (mayOverflow) 2826 { 2827 u[m + v.length] = 0; 2828 saveq = quotient[m]; 2829 } 2830 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)], 2831 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow); 2832 if (mayOverflow) 2833 { 2834 assert(quotient[m] == 0, "quotient must not be 0"); 2835 quotient[m] = saveq; 2836 } 2837 m -= v.length; 2838 } 2839 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch); 2840 () @trusted { GC.free(scratch.ptr); } (); 2841 } 2842 2843 @system unittest 2844 { 2845 version (none) 2846 { 2847 import core.stdc.stdio; 2848 2849 void printBiguint(const uint [] data) 2850 { 2851 char [] buff = biguintToHex(new char[data.length*9], data, '_'); 2852 printf("%.*s\n", cast(int) buff.length, buff.ptr); 2853 } 2854 2855 void printDecimalBigUint(BigUint data) 2856 { 2857 auto str = data.toDecimalString(0); 2858 printf("%.*s\n", cast(int) str.length, str.ptr); 2859 } 2860 } 2861 2862 uint [] a, b; 2863 a = new uint[43]; 2864 b = new uint[179]; 2865 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i; 2866 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546; 2867 2868 a[$-1] |= 0x8000_0000; 2869 uint [] r = new uint[a.length]; 2870 uint [] q = new uint[b.length-a.length+1]; 2871 2872 divModInternal(q, r, b, a); 2873 q = q[0..$-1]; 2874 uint [] r1 = r.dup; 2875 uint [] q1 = q.dup; 2876 blockDivMod(q, b, a); 2877 r = b[0 .. a.length]; 2878 assert(r[] == r1[]); 2879 assert(q[] == q1[]); 2880 } 2881 2882 // biguintToOctal 2883 @safe unittest 2884 { 2885 enum bufSize = 5 * BigDigitBits / 3 + 1; 2886 auto buf = new char[bufSize]; 2887 size_t i; 2888 BigDigit[] data = [ 342391 ]; 2889 2890 // Basic functionality with single word 2891 i = biguintToOctal(buf, data); 2892 assert(i == bufSize - 7 && buf[i .. $] == "1234567"); 2893 2894 // Test carrying bits between words 2895 data = [ 0x77053977, 0x39770539, 0x00000005 ]; 2896 i = biguintToOctal(buf, data); 2897 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567"); 2898 2899 // Test carried bits in the last word 2900 data = [ 0x80000000 ]; 2901 i = biguintToOctal(buf, data); 2902 assert(buf[i .. $] == "20000000000"); 2903 2904 // Test boundary between 3rd and 4th word where the number of bits is 2905 // divisible by 3 and no bits should be carried. 2906 // 2907 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the 2908 // rollover happens, so that if any bugs happen in wrongly adding the carry 2909 // to the next word, non-zero bits will show up in the output. 2910 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ]; 2911 i = biguintToOctal(buf, data); 2912 assert(buf[i .. $] == "2060000000001400000000030000000000"); 2913 2914 // Boundary case: 0 2915 data = [ 0 ]; 2916 i = biguintToOctal(buf, data); 2917 assert(buf[i .. $] == "0"); 2918 }