1 /** Fundamental operations for arbitrary-precision arithmetic 2 * 3 * These functions are for internal use only. 4 */ 5 /* Copyright Don Clugston 2008 - 2010. 6 * Distributed under the Boost Software License, Version 1.0. 7 * (See accompanying file LICENSE_1_0.txt or copy at 8 * http://www.boost.org/LICENSE_1_0.txt) 9 */ 10 /* References: 11 "Modern Computer Arithmetic" (MCA) is the primary reference for all 12 algorithms used in this library. 13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 14 Version 0.5.9, (Oct 2010). 15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 16 Max-Planck Institute fuer Informatik, (Oct 1998). 17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.", 18 INRIA 4664, (Dec 2002). 19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?", 20 http://bodrato.it/papers (2006). 21 - A. Fog, "Optimizing subroutines in assembly language", 22 www.agner.org/optimize (2008). 23 - A. Fog, "The microarchitecture of Intel and AMD CPU's", 24 www.agner.org/optimize (2008). 25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs 26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008). 27 28 Idioms: 29 Many functions in this module use 30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)' 31 in order to disable implicit conversion. 32 33 */ 34 module std.internal.math.biguintcore; 35 36 version (D_InlineAsm_X86) 37 { 38 static import std.internal.math.biguintx86; 39 } 40 static import std.internal.math.biguintnoasm; 41 42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT, 43 KARATSUBASQUARELIMIT; 44 45 alias multibyteAdd = multibyteAddSub!('+'); 46 alias multibyteSub = multibyteAddSub!('-'); 47 48 private import std.traits; 49 private import std.range.primitives; 50 public import std.ascii : LetterCase; 51 import std.range.primitives; 52 import std.traits; 53 54 private: 55 56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for 57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's 58 // used by the other architectures too. 59 // See comments below in case it has to be refactored. 60 version (X86) 61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry) 62 { 63 // must be checked before, otherwise D_InlineAsm_X86 is true. 64 if (__ctfe) 65 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 66 // Runtime. 67 else version (D_InlineAsm_X86) 68 return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry); 69 // Runtime if no asm available. 70 else 71 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry); 72 } 73 // Any other architecture 74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub; 75 76 version (X86) 77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) 78 { 79 if (__ctfe) 80 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 81 else version (D_InlineAsm_X86) 82 return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry); 83 else 84 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry); 85 } 86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign; 87 88 version (X86) 89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits) 90 { 91 if (__ctfe) 92 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 93 else version (D_InlineAsm_X86) 94 return std.internal.math.biguintx86.multibyteShl(dest, src, numbits); 95 else 96 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits); 97 } 98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl; 99 100 version (X86) 101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits) 102 { 103 if (__ctfe) 104 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 105 else version (D_InlineAsm_X86) 106 std.internal.math.biguintx86.multibyteShr(dest, src, numbits); 107 else 108 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits); 109 } 110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr; 111 112 version (X86) 113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 114 { 115 if (__ctfe) 116 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 117 else version (D_InlineAsm_X86) 118 return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry); 119 else 120 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry); 121 } 122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul; 123 124 version (X86) 125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 126 { 127 if (__ctfe) 128 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 129 else version (D_InlineAsm_X86) 130 return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry); 131 else 132 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry); 133 } 134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd; 135 136 version (X86) 137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right) 138 { 139 if (__ctfe) 140 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 141 else version (D_InlineAsm_X86) 142 std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right); 143 else 144 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right); 145 } 146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate; 147 148 version (X86) 149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow) 150 { 151 if (__ctfe) 152 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 153 else version (D_InlineAsm_X86) 154 return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow); 155 else 156 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow); 157 } 158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign; 159 160 version (X86) 161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src) 162 { 163 if (__ctfe) 164 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 165 else version (D_InlineAsm_X86) 166 std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src); 167 else 168 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src); 169 } 170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares; 171 172 version (X86) 173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x) 174 { 175 if (__ctfe) 176 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 177 else version (D_InlineAsm_X86) 178 std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x); 179 else 180 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x); 181 } 182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate; 183 184 version (X86) 185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x) 186 { 187 if (__ctfe) 188 std.internal.math.biguintnoasm.multibyteSquare(result, x); 189 else version (D_InlineAsm_X86) 190 std.internal.math.biguintx86.multibyteSquare(result, x); 191 else 192 std.internal.math.biguintnoasm.multibyteSquare(result, x); 193 } 194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare; 195 196 // Limits for when to switch between algorithms. 197 // Half the size of the data cache. 198 @nogc nothrow pure @safe size_t getCacheLimit() 199 { 200 import core.cpuid : dataCaches; 201 return dataCaches[0].size * 1024 / 2; 202 } 203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division 204 205 206 // These constants are used by shift operations 207 static if (BigDigit.sizeof == int.sizeof) 208 { 209 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 } 210 alias BIGHALFDIGIT = ushort; 211 } 212 else static if (BigDigit.sizeof == long.sizeof) 213 { 214 alias BIGHALFDIGIT = uint; 215 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 } 216 } 217 else static assert(0, "Unsupported BigDigit size"); 218 219 import std.exception : assumeUnique; 220 import std.traits : isIntegral; 221 enum BigDigitBits = BigDigit.sizeof*8; 222 template maxBigDigits(T) 223 if (isIntegral!T) 224 { 225 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof; 226 } 227 228 static immutable BigDigit[] ZERO = [0]; 229 static immutable BigDigit[] ONE = [1]; 230 static immutable BigDigit[] TWO = [2]; 231 static immutable BigDigit[] TEN = [10]; 232 233 234 public: 235 236 /// BigUint performs memory management and wraps the low-level calls. 237 struct BigUint 238 { 239 private: 240 pure invariant() 241 { 242 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ), 243 "Invariant requires data to not empty or zero"); 244 } 245 246 immutable(BigDigit) [] data = ZERO; 247 248 this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe 249 { 250 data = x; 251 } 252 package(std) // used from: std.bigint 253 this(T)(T x) pure nothrow @safe scope if (isIntegral!T) 254 { 255 opAssign(x); 256 } 257 258 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc { 259 return assumeUnique(input); 260 }; 261 public: 262 // Length in uints 263 @property size_t uintLength() pure nothrow const @safe @nogc scope 264 { 265 static if (BigDigit.sizeof == uint.sizeof) 266 { 267 return data.length; 268 } 269 else static if (BigDigit.sizeof == ulong.sizeof) 270 { 271 return data.length * 2 - 272 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0); 273 } 274 } 275 @property size_t ulongLength() pure nothrow const @safe @nogc scope 276 { 277 static if (BigDigit.sizeof == uint.sizeof) 278 { 279 return (data.length + 1) >> 1; 280 } 281 else static if (BigDigit.sizeof == ulong.sizeof) 282 { 283 return data.length; 284 } 285 } 286 287 // The value at (cast(ulong[]) data)[n] 288 ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope 289 { 290 static if (BigDigit.sizeof == int.sizeof) 291 { 292 if (data.length == n*2 + 1) return data[n*2]; 293 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 ); 294 } 295 else static if (BigDigit.sizeof == long.sizeof) 296 { 297 return data[n]; 298 } 299 } 300 301 uint peekUint(size_t n) pure nothrow const @safe @nogc scope 302 { 303 static if (BigDigit.sizeof == int.sizeof) 304 { 305 return data[n]; 306 } 307 else 308 { 309 immutable x = data[n >> 1]; 310 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x; 311 } 312 } 313 314 /// 315 void opAssign(Tulong)(Tulong u) pure nothrow @safe scope if (is (Tulong == ulong)) 316 { 317 if (u == 0) data = ZERO; 318 else if (u == 1) data = ONE; 319 else if (u == 2) data = TWO; 320 else if (u == 10) data = TEN; 321 else 322 { 323 static if (BigDigit.sizeof == int.sizeof) 324 { 325 uint ulo = cast(uint)(u & 0xFFFF_FFFF); 326 uint uhi = cast(uint)(u >> 32); 327 if (uhi == 0) 328 { 329 data = [ulo]; 330 } 331 else 332 { 333 data = [ulo, uhi]; 334 } 335 } 336 else static if (BigDigit.sizeof == long.sizeof) 337 { 338 data = [u]; 339 } 340 } 341 } 342 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope 343 { 344 this.data = y.data; 345 } 346 347 /// 348 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope 349 { 350 if (data.length != y.data.length) 351 return (data.length > y.data.length) ? 1 : -1; 352 size_t k = highestDifferentDigit(data, y.data); 353 if (data[k] == y.data[k]) 354 return 0; 355 return data[k] > y.data[k] ? 1 : -1; 356 } 357 358 /// 359 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope if (is (Tulong == ulong)) 360 { 361 if (data.length > maxBigDigits!Tulong) 362 return 1; 363 364 foreach_reverse (i; 0 .. maxBigDigits!Tulong) 365 { 366 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits)); 367 if (tmp == 0) 368 if (data.length >= i+1) 369 { 370 // Since ZERO is [0], so we cannot simply return 1 here, as 371 // data[i] would be 0 for i == 0 in that case. 372 return (data[i] > 0) ? 1 : 0; 373 } 374 else 375 continue; 376 else 377 if (i+1 > data.length) 378 return -1; 379 else if (tmp != data[i]) 380 return data[i] > tmp ? 1 : -1; 381 } 382 return 0; 383 } 384 385 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope 386 { 387 return y.data[] == data[]; 388 } 389 390 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope 391 { 392 if (data.length > 2) 393 return false; 394 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 395 uint yhi = cast(uint)(y >> 32); 396 if (data.length == 2 && data[1]!=yhi) 397 return false; 398 if (data.length == 1 && yhi != 0) 399 return false; 400 return (data[0] == ylo); 401 } 402 403 bool isZero() pure const nothrow @safe @nogc scope 404 { 405 return data.length == 1 && data[0] == 0; 406 } 407 408 size_t numBytes() pure nothrow const @safe @nogc scope 409 { 410 return data.length * BigDigit.sizeof; 411 } 412 413 // the extra bytes are added to the start of the string 414 char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope 415 { 416 immutable predictlength = 20+20*(data.length/2); // just over 19 417 char [] buff = new char[frontExtraBytes + predictlength]; 418 ptrdiff_t sofar = biguintToDecimal(buff, data.dup); 419 return buff[sofar-frontExtraBytes..$]; 420 } 421 422 /** Convert to a hex string, printing a minimum number of digits 'minPadding', 423 * allocating an additional 'frontExtraBytes' at the start of the string. 424 * Padding is done with padChar, which may be '0' or ' '. 425 * 'separator' is a digit separation character. If non-zero, it is inserted 426 * between every 8 digits. 427 * Separator characters do not contribute to the minPadding. 428 */ 429 char [] toHexString(int frontExtraBytes, char separator = 0, 430 int minPadding=0, char padChar = '0', 431 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope 432 { 433 // Calculate number of extra padding bytes 434 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof) 435 ? minPadding - data.length * 2 * BigDigit.sizeof : 0; 436 437 // Length not including separator bytes 438 size_t lenBytes = data.length * 2 * BigDigit.sizeof; 439 440 // Calculate number of separator bytes 441 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0; 442 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0; 443 444 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes]; 445 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase); 446 if (extraPad > 0) 447 { 448 if (separator) 449 { 450 size_t start = frontExtraBytes; // first index to pad 451 if (extraPad &7) 452 { 453 // Do 1 to 7 extra zeros. 454 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar; 455 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator); 456 start += (extraPad & 7) + 1; 457 } 458 for (int i=0; i< (extraPad >> 3); ++i) 459 { 460 buff[start .. start + 8] = padChar; 461 buff[start + 8] = (padChar == ' ' ? ' ' : separator); 462 start += 9; 463 } 464 } 465 else 466 { 467 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar; 468 } 469 } 470 int z = frontExtraBytes; 471 if (lenBytes > minPadding) 472 { 473 // Strip leading zeros. 474 ptrdiff_t maxStrip = lenBytes - minPadding; 475 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0) 476 { 477 ++z; 478 --maxStrip; 479 } 480 } 481 if (padChar!='0') 482 { 483 // Convert leading zeros into padChars. 484 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k) 485 { 486 if (buff[k]=='0') buff[k]=padChar; 487 } 488 } 489 return buff[z-frontExtraBytes..$]; 490 } 491 492 /** 493 * Convert to an octal string. 494 */ 495 char[] toOctalString() pure nothrow @safe const scope 496 { 497 auto predictLength = 1 + data.length*BigDigitBits / 3; 498 char[] buff = new char[predictLength]; 499 size_t firstNonZero = biguintToOctal(buff, data); 500 return buff[firstNonZero .. $]; 501 } 502 503 // return false if invalid character found 504 bool fromHexString(Range)(Range s) scope if ( 505 isBidirectionalRange!Range && isSomeChar!(ElementType!Range)) 506 { 507 import std.range : walkLength; 508 509 //Strip leading zeros 510 while (!s.empty && s.front == '0') 511 s.popFront; 512 513 if (s.empty) 514 { 515 data = ZERO; 516 return true; 517 } 518 519 immutable len = (s.save.walkLength + 15) / 4; 520 auto tmp = new BigDigit[len + 1]; 521 uint part, sofar, partcount; 522 523 foreach_reverse (character; s) 524 { 525 if (character == '_') 526 continue; 527 528 uint x; 529 if (character >= '0' && character <= '9') 530 { 531 x = character - '0'; 532 } 533 else if (character >= 'A' && character <= 'F') 534 { 535 x = character - 'A' + 10; 536 } 537 else if (character >= 'a' && character <= 'f') 538 { 539 x = character - 'a' + 10; 540 } 541 else 542 { 543 return false; 544 } 545 546 part >>= 4; 547 part |= (x << (32 - 4)); 548 ++partcount; 549 550 if (partcount == 8) 551 { 552 tmp[sofar] = part; 553 ++sofar; 554 partcount = 0; 555 part = 0; 556 } 557 } 558 if (part) 559 { 560 for ( ; partcount != 8; ++partcount) part >>= 4; 561 tmp[sofar] = part; 562 ++sofar; 563 } 564 if (sofar == 0) 565 data = ZERO; 566 else 567 data = trustedAssumeUnique(tmp[0 .. sofar]); 568 569 return true; 570 } 571 572 // return true if OK; false if erroneous characters found 573 bool fromDecimalString(Range)(Range s) scope if ( 574 isForwardRange!Range && isSomeChar!(ElementType!Range)) 575 { 576 import std.range : walkLength; 577 578 while (!s.empty && s.front == '0') 579 { 580 s.popFront; 581 } 582 583 if (s.empty) 584 { 585 data = ZERO; 586 return true; 587 } 588 589 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19; 590 auto tmp = new BigDigit[predict_length]; 591 592 tmp.length = biguintFromDecimal(tmp, s); 593 594 data = trustedAssumeUnique(tmp); 595 return true; 596 } 597 598 void fromMagnitude(Range)(Range magnitude) scope 599 if (isInputRange!Range 600 && (isForwardRange!Range || hasLength!Range) 601 && isUnsigned!(ElementType!Range)) 602 { 603 while (!magnitude.empty && magnitude.front == 0) 604 magnitude.popFront; 605 static if (hasLength!Range) 606 immutable inputLen = magnitude.length; 607 else 608 immutable inputLen = magnitude.save.walkLength; 609 if (!inputLen) 610 { 611 this.data = ZERO; 612 return; 613 } 614 // `magnitude` has its most significant element first but BigUint.data 615 // stores the most significant last. 616 BigDigit[] newDigits; 617 alias E = ElementType!Range; 618 static if (E.sizeof == BigDigit.sizeof) 619 { 620 newDigits = new BigDigit[inputLen]; 621 foreach_reverse (ref digit; newDigits) 622 { 623 digit = magnitude.front; 624 magnitude.popFront(); 625 } 626 } 627 else static if (E.sizeof < BigDigit.sizeof) 628 { 629 enum elementsPerDigit = BigDigit.sizeof / E.sizeof; 630 newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit]; 631 immutable remainder = inputLen % elementsPerDigit; 632 // If there is a remainder specially assemble the most significant digit. 633 if (remainder) 634 { 635 BigDigit tmp = magnitude.front; 636 magnitude.popFront(); 637 foreach (_; 1 .. remainder) 638 { 639 tmp = (tmp << (E.sizeof * 8)) | magnitude.front; 640 magnitude.popFront(); 641 } 642 newDigits[$-1] = tmp; 643 } 644 // Assemble full digits from most to least significant. 645 foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)]) 646 { 647 BigDigit tmp; 648 static foreach (n; 0 .. elementsPerDigit) 649 { 650 tmp |= cast(BigDigit) magnitude.front << 651 ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8); 652 magnitude.popFront(); 653 } 654 digit = tmp; 655 } 656 } 657 else static if (E.sizeof > BigDigit.sizeof) 658 { 659 enum digitsPerElement = E.sizeof / BigDigit.sizeof; 660 newDigits = new BigDigit[inputLen * digitsPerElement]; 661 size_t i = newDigits.length - 1; 662 foreach (element; magnitude) 663 { 664 static foreach (n; 0 .. digitsPerElement) 665 newDigits[i - n] = 666 cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8)); 667 i -= digitsPerElement; 668 } 669 while (newDigits[$-1] == 0) 670 newDigits = newDigits[0 .. $-1]; 671 } 672 else 673 static assert(0); 674 this.data = trustedAssumeUnique(newDigits); 675 return; 676 } 677 678 nothrow pure @safe unittest 679 { 680 immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD]; 681 // Internal representation is most-significant-last but `fromMagnitude` 682 // argument is most-significant-first. 683 immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005]; 684 BigUint b; 685 // Test with reference magnitude. 686 b.fromMagnitude(referenceMagnitude); 687 assert(b.data == referenceData); 688 // Test ubyte array. 689 import std.bitmanip : nativeToBigEndian; 690 ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~ 691 nativeToBigEndian(referenceMagnitude[1]) ~ 692 nativeToBigEndian(referenceMagnitude[2]); 693 b.data = ZERO; 694 b.fromMagnitude(ubyteMagnitude); 695 assert(b.data == referenceData); 696 // Test ulong array. 697 static if (BigDigit.sizeof == uint.sizeof) 698 immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]), 699 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2], 700 ]; 701 else static if (BigDigit.sizeof == ulong.sizeof) 702 alias ulongMagnitude = referenceMagnitude; 703 b.data = ZERO; 704 b.fromMagnitude(ulongMagnitude); 705 assert(b.data == referenceData); 706 } 707 708 //////////////////////// 709 // 710 // All of these member functions create a new BigUint. 711 712 // return x >> y 713 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope 714 if (op == ">>" && is (Tulong == ulong)) 715 { 716 assert(y > 0, "Can not right shift BigUint by 0"); 717 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 718 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO); 719 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 720 if (bits == 0) 721 { 722 return BigUint(data[words..$]); 723 } 724 else 725 { 726 uint [] result = new BigDigit[data.length - words]; 727 multibyteShr(result, data[words..$], bits); 728 729 if (result.length > 1 && result[$-1] == 0) 730 return BigUint(trustedAssumeUnique(result[0 .. $-1])); 731 else 732 return BigUint(trustedAssumeUnique(result)); 733 } 734 } 735 736 // return x << y 737 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope 738 if (op == "<<" && is (Tulong == ulong)) 739 { 740 assert(y > 0, "Can not left shift BigUint by 0"); 741 if (isZero()) return this; 742 uint bits = cast(uint) y & BIGDIGITSHIFTMASK; 743 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max), 744 "Shift result exceeds temporary store"); 745 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 746 BigDigit [] result = new BigDigit[data.length + words+1]; 747 result[0 .. words] = 0; 748 if (bits == 0) 749 { 750 result[words .. words+data.length] = data[]; 751 return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 752 } 753 else 754 { 755 immutable c = multibyteShl(result[words .. words+data.length], data, bits); 756 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length])); 757 result[$-1] = c; 758 return BigUint(trustedAssumeUnique(result)); 759 } 760 } 761 762 // If wantSub is false, return x + y, leaving sign unchanged 763 // If wantSub is true, return abs(x - y), negating sign if x < y 764 static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, 765 bool wantSub, ref bool sign) pure nothrow @safe if (is(Tulong == ulong)) 766 { 767 BigUint r; 768 if (wantSub) 769 { // perform a subtraction 770 if (x.data.length > 2) 771 { 772 // subInt returns GC allocated array, can be safely cast to immutable 773 r.data = (() @trusted => cast(immutable) subInt(x.data, y))(); 774 } 775 else 776 { // could change sign! 777 ulong xx = x.data[0]; 778 if (x.data.length > 1) 779 xx += (cast(ulong) x.data[1]) << 32; 780 ulong d; 781 if (xx <= y) 782 { 783 d = y - xx; 784 sign = !sign; 785 } 786 else 787 { 788 d = xx - y; 789 } 790 if (d == 0) 791 { 792 r = 0UL; 793 sign = false; 794 return r; 795 } 796 if (d > uint.max) 797 { 798 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)]; 799 } 800 else 801 { 802 r.data = [cast(uint)(d & 0xFFFF_FFFF)]; 803 } 804 } 805 } 806 else 807 { 808 // addInt returns GC allocated array, can be safely cast to immutable 809 r.data = (() @trusted => cast(immutable) addInt(x.data, y))(); 810 } 811 return r; 812 } 813 814 // If wantSub is false, return x + y, leaving sign unchanged. 815 // If wantSub is true, return abs(x - y), negating sign if x<y 816 static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign) 817 pure nothrow @safe 818 { 819 BigUint r; 820 if (wantSub) 821 { // perform a subtraction 822 bool negative; 823 // sub returns GC allocated array, can be safely cast to immutable 824 r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))(); 825 sign ^= negative; 826 if (r.isZero()) 827 { 828 sign = false; 829 } 830 } 831 else 832 { 833 // add returns GC allocated array, can be safely cast to immutable 834 r.data = (() @trusted => cast(immutable) add(x.data, y.data))(); 835 } 836 return r; 837 } 838 839 840 // return x*y. 841 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe 842 { 843 if (y == 0 || x == 0) return BigUint(ZERO); 844 static if (T.sizeof * 8 <= 32) 845 uint hi = 0; 846 else 847 uint hi = cast(uint) (y >>> 32); 848 uint lo = cast(uint) (y & 0xFFFF_FFFF); 849 uint [] result = new BigDigit[x.data.length+1+(hi != 0)]; 850 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0); 851 if (hi != 0) 852 { 853 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1], 854 x.data, hi, 0); 855 } 856 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 857 } 858 859 /* return x * y. 860 */ 861 static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe 862 { 863 if (y == 0 || x == 0) 864 return BigUint(ZERO); 865 auto len = x.data.length + y.data.length; 866 BigDigit [] result = new BigDigit[len]; 867 if (y.data.length > x.data.length) 868 { 869 mulInternal(result, y.data, x.data); 870 } 871 else 872 { 873 if (x.data[]==y.data[]) squareInternal(result, x.data); 874 else mulInternal(result, x.data, y.data); 875 } 876 // the highest element could be zero, 877 // in which case we need to reduce the length 878 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 879 } 880 881 // return x / y 882 static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe 883 if ( is(immutable T == immutable uint) ) 884 { 885 uint y = y_; 886 if (y == 1) 887 return x; 888 uint [] result = new BigDigit[x.data.length]; 889 if ((y&(-y))==y) 890 { 891 assert(y != 0, "BigUint division by zero"); 892 // perfect power of 2 893 uint b = 0; 894 for (;y != 1; y>>=1) 895 { 896 ++b; 897 } 898 multibyteShr(result, x.data, b); 899 } 900 else 901 { 902 result[] = x.data[]; 903 cast(void) multibyteDivAssign(result, y, 0); 904 } 905 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 906 } 907 908 static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe 909 if ( is(immutable T == immutable ulong) ) 910 { 911 if (y <= uint.max) 912 return divInt!uint(x, cast(uint) y); 913 if (x.data.length < 2) 914 return BigUint(ZERO); 915 uint hi = cast(uint)(y >>> 32); 916 uint lo = cast(uint)(y & 0xFFFF_FFFF); 917 immutable uint[2] z = [lo, hi]; 918 BigDigit[] result = new BigDigit[x.data.length - z.length + 1]; 919 divModInternal(result, null, x.data, z[]); 920 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 921 } 922 923 // return x % y 924 static uint modInt(T)(scope BigUint x, T y_) pure if ( is(immutable T == immutable uint) ) 925 { 926 import core.memory : GC; 927 uint y = y_; 928 assert(y != 0, "% 0 not allowed"); 929 if ((y&(-y)) == y) 930 { // perfect power of 2 931 return x.data[0] & (y-1); 932 } 933 else 934 { 935 // horribly inefficient - malloc, copy, & store are unnecessary. 936 uint [] wasteful = new BigDigit[x.data.length]; 937 wasteful[] = x.data[]; 938 immutable rem = multibyteDivAssign(wasteful, y, 0); 939 () @trusted { GC.free(wasteful.ptr); } (); 940 return rem; 941 } 942 } 943 944 // return x / y 945 static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe 946 { 947 if (y.data.length > x.data.length) 948 return BigUint(ZERO); 949 if (y.data.length == 1) 950 return divInt(x, y.data[0]); 951 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 952 divModInternal(result, null, x.data, y.data); 953 return BigUint(removeLeadingZeros(trustedAssumeUnique(result))); 954 } 955 956 // return x % y 957 static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe 958 { 959 if (y.data.length > x.data.length) return x; 960 if (y.data.length == 1) 961 { 962 return BigUint([modInt(x, y.data[0])]); 963 } 964 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 965 BigDigit [] rem = new BigDigit[y.data.length]; 966 divModInternal(result, rem, x.data, y.data); 967 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 968 } 969 970 // Return x / y in quotient, x % y in remainder 971 static void divMod(BigUint x, scope BigUint y, 972 out BigUint quotient, out BigUint remainder) pure nothrow @safe 973 { 974 /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */ 975 if (y.data.length > x.data.length) 976 { 977 quotient = 0uL; 978 remainder = x; 979 } 980 else if (y.data.length == 1) 981 { 982 quotient = divInt(x, y.data[0]); 983 remainder = BigUint([modInt(x, y.data[0])]); 984 } 985 else 986 { 987 BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1]; 988 BigDigit[] rem = new BigDigit[y.data.length]; 989 divModInternal(quot, rem, x.data, y.data); 990 quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot))); 991 remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem))); 992 } 993 } 994 995 // return x op y 996 static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign) 997 pure nothrow @safe if (op == "|" || op == "^" || op == "&") 998 { 999 auto d1 = includeSign(x.data, y.uintLength, xSign); 1000 auto d2 = includeSign(y.data, x.uintLength, ySign); 1001 1002 foreach (i; 0 .. d1.length) 1003 { 1004 mixin("d1[i] " ~ op ~ "= d2[i];"); 1005 } 1006 1007 mixin("resultSign = xSign " ~ op ~ " ySign;"); 1008 1009 if (resultSign) 1010 { 1011 twosComplement(d1, d1); 1012 } 1013 1014 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1))); 1015 } 1016 1017 /** 1018 * Return a BigUint which is x raised to the power of y. 1019 * Method: Powers of 2 are removed from x, then left-to-right binary 1020 * exponentiation is used. 1021 * Memory allocation is minimized: at most one temporary BigUint is used. 1022 */ 1023 static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe 1024 { 1025 // Deal with the degenerate cases first. 1026 if (y == 0) return BigUint(ONE); 1027 if (y == 1) return x; 1028 if (x == 0 || x == 1) return x; 1029 1030 BigUint result; 1031 1032 // Simplify, step 1: Remove all powers of 2. 1033 uint firstnonzero = firstNonZeroDigit(x.data); 1034 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits)) 1035 // where BigDigitBits = BigDigit.sizeof * 8 1036 1037 // See if x[firstnonzero..$] can now fit into a single digit. 1038 bool singledigit = ((x.data.length - firstnonzero) == 1); 1039 // If true, then x0 is that digit 1040 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits)) 1041 BigDigit x0 = x.data[firstnonzero]; 1042 assert(x0 != 0, "pow(0, y) not allowed"); 1043 // Length of the non-zero portion 1044 size_t nonzerolength = x.data.length - firstnonzero; 1045 ulong y0; 1046 uint evenbits = 0; // number of even bits in the bottom of x 1047 while (!(x0 & 1)) 1048 { 1049 x0 >>= 1; 1050 ++evenbits; 1051 } 1052 1053 if (x.data.length- firstnonzero == 2) 1054 { 1055 // Check for a single digit straddling a digit boundary 1056 const BigDigit x1 = x.data[firstnonzero+1]; 1057 if ((x1 >> evenbits) == 0) 1058 { 1059 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits)); 1060 singledigit = true; 1061 } 1062 } 1063 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1064 1065 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end 1066 1067 // Simplify, step 2: For singledigits, see if we can trivially reduce y 1068 1069 BigDigit finalMultiplier = 1UL; 1070 1071 if (singledigit) 1072 { 1073 // x fits into a single digit. Raise it to the highest power we can 1074 // that still fits into a single digit, then reduce the exponent accordingly. 1075 // We're quite likely to have a residual multiply at the end. 1076 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100. 1077 // and 5^^13 still fits into a uint. 1078 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK); 1079 if (x0 == 1) 1080 { // Perfect power of 2 1081 result = 1UL; 1082 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1083 } 1084 immutable p = highestPowerBelowUintMax(x0); 1085 if (y <= p) 1086 { // Just do it with pow 1087 result = cast(ulong) intpow(x0, y); 1088 if (evenbits + firstnonzero == 0) 1089 return result; 1090 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y; 1091 } 1092 y0 = y / p; 1093 finalMultiplier = intpow(x0, y - y0*p); 1094 x0 = intpow(x0, p); 1095 // Result is x0 1096 nonzerolength = 1; 1097 } 1098 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1099 1100 // Perform a crude check for overflow and allocate result buffer. 1101 // The length required is y * lg2(x) bits. 1102 // which will always fit into y*x.length digits. But this is 1103 // a gross overestimate if x is small (length 1 or 2) and the highest 1104 // digit is nearly empty. 1105 // A better estimate is: 1106 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits, 1107 // and the first term is always between 1108 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and 1109 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS 1110 // For single digit payloads, we already have 1111 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits)) 1112 // and x0 is almost a full digit, so it's a tight estimate. 1113 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y 1114 // Note that the divisions must be rounded up. 1115 1116 // Estimated length in BigDigits 1117 immutable estimatelength = singledigit 1118 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y 1119 : x.data.length * y; 1120 // Imprecise check for overflow. Makes the extreme cases easier to debug 1121 // (less extreme overflow will result in an out of memory error). 1122 if (estimatelength > uint.max/(4*BigDigit.sizeof)) 1123 assert(0, "Overflow in BigInt.pow"); 1124 1125 // The result buffer includes space for all the trailing zeros 1126 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength]; 1127 1128 // Do all the powers of 2! 1129 size_t result_start = cast(size_t)( firstnonzero * y 1130 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0)); 1131 1132 resultBuffer[0 .. result_start] = 0; 1133 BigDigit [] t1 = resultBuffer[result_start..$]; 1134 BigDigit [] r1; 1135 1136 if (singledigit) 1137 { 1138 r1 = t1[0 .. 1]; 1139 r1[0] = x0; 1140 y = y0; 1141 } 1142 else 1143 { 1144 // It's not worth right shifting by evenbits unless we also shrink the length after each 1145 // multiply or squaring operation. That might still be worthwhile for large y. 1146 r1 = t1[0 .. x.data.length - firstnonzero]; 1147 r1[0..$] = x.data[firstnonzero..$]; 1148 } 1149 1150 if (y>1) 1151 { // Set r1 = r1 ^^ y. 1152 // The secondary buffer only needs space for the multiplication results 1153 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start]; 1154 BigDigit [] r2; 1155 1156 int shifts = 63; // num bits in a long 1157 while (!(y & 0x8000_0000_0000_0000L)) 1158 { 1159 y <<= 1; 1160 --shifts; 1161 } 1162 y <<=1; 1163 1164 while (y != 0) 1165 { 1166 // For each bit of y: Set r1 = r1 * r1 1167 // If the bit is 1, set r1 = r1 * x 1168 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5. 1169 // Optimization opportunity: if more than 2 bits in y are set, 1170 // it's usually possible to reduce the number of multiplies 1171 // by caching odd powers of x. eg for y = 54, 1172 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2 1173 r2 = t2[0 .. r1.length*2]; 1174 squareInternal(r2, r1); 1175 if (y & 0x8000_0000_0000_0000L) 1176 { 1177 r1 = t1[0 .. r2.length + nonzerolength]; 1178 if (singledigit) 1179 { 1180 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0); 1181 } 1182 else 1183 { 1184 mulInternal(r1, r2, x.data[firstnonzero..$]); 1185 } 1186 } 1187 else 1188 { 1189 r1 = t1[0 .. r2.length]; 1190 r1[] = r2[]; 1191 } 1192 y <<=1; 1193 shifts--; 1194 } 1195 while (shifts>0) 1196 { 1197 r2 = t2[0 .. r1.length * 2]; 1198 squareInternal(r2, r1); 1199 r1 = t1[0 .. r2.length]; 1200 r1[] = r2[]; 1201 --shifts; 1202 } 1203 } 1204 1205 if (finalMultiplier != 1) 1206 { 1207 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0); 1208 if (carry) 1209 { 1210 r1 = t1[0 .. r1.length + 1]; 1211 r1[$-1] = carry; 1212 } 1213 } 1214 if (evenshiftbits) 1215 { 1216 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits); 1217 if (carry != 0) 1218 { 1219 r1 = t1[0 .. r1.length + 1]; 1220 r1[$ - 1] = carry; 1221 } 1222 } 1223 while (r1[$ - 1]==0) 1224 { 1225 r1=r1[0 .. $ - 1]; 1226 } 1227 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length])); 1228 } 1229 1230 // Implement toHash so that BigUint works properly as an AA key. 1231 size_t toHash() const @nogc nothrow pure @safe scope 1232 { 1233 return .hashOf(data); 1234 } 1235 1236 } // end BigUint 1237 1238 @safe pure nothrow unittest 1239 { 1240 // ulong comparison test 1241 BigUint a = [1]; 1242 assert(a == 1); 1243 // https://issues.dlang.org/show_bug.cgi?id=9548 1244 assert(a < 0x8000_0000_0000_0000UL); 1245 1246 // https://issues.dlang.org/show_bug.cgi?id=12234 1247 BigUint z = [0]; 1248 assert(z == 0UL); 1249 assert(!(z > 0UL)); 1250 assert(!(z < 0UL)); 1251 } 1252 1253 // https://issues.dlang.org/show_bug.cgi?id=16223 1254 @system pure nothrow unittest 1255 { 1256 BigUint a = [3]; 1257 int b = 5; 1258 assert(BigUint.mulInt(a,b) == 15); 1259 } 1260 1261 // Remove leading zeros from x, to restore the BigUint invariant 1262 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe 1263 { 1264 size_t k = x.length; 1265 while (k>1 && x[k - 1]==0) --k; 1266 return x[0 .. k]; 1267 } 1268 1269 pure @safe unittest 1270 { 1271 BigUint r = BigUint([5]); 1272 BigUint t = BigUint([7]); 1273 BigUint s = BigUint.mod(r, t); 1274 assert(s == 5); 1275 } 1276 1277 1278 @safe pure unittest 1279 { 1280 BigUint r; 1281 r = 5UL; 1282 assert(r.peekUlong(0) == 5UL); 1283 assert(r.peekUint(0) == 5U); 1284 r = 0x1234_5678_9ABC_DEF0UL; 1285 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL); 1286 assert(r.peekUint(0) == 0x9ABC_DEF0U); 1287 } 1288 1289 1290 // Pow tests 1291 pure @safe unittest 1292 { 1293 BigUint r, s; 1294 r.fromHexString("80000000_00000001"); 1295 s = BigUint.pow(r, 5); 1296 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000" 1297 ~ "_00000002_80000000_00000001"); 1298 assert(s == r); 1299 s = 10UL; 1300 s = BigUint.pow(s, 39); 1301 r.fromDecimalString("1000000000000000000000000000000000000000"); 1302 assert(s == r); 1303 r.fromHexString("1_E1178E81_00000000"); 1304 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds 1305 1306 r.fromDecimalString("000_000_00"); 1307 assert(r == 0); 1308 r.fromDecimalString("0007"); 1309 assert(r == 7); 1310 r.fromDecimalString("0"); 1311 assert(r == 0); 1312 } 1313 1314 // Radix conversion tests 1315 @safe pure unittest 1316 { 1317 BigUint r; 1318 r.fromHexString("1_E1178E81_00000000"); 1319 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000"); 1320 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000"); 1321 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000"); 1322 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000"); 1323 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000"); 1324 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000"); 1325 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000"); 1326 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000"); 1327 r = 0UL; 1328 assert(r.toHexString(0, '_', 0) == "0"); 1329 assert(r.toHexString(0, '_', 7) == "0000000"); 1330 assert(r.toHexString(0, '_', 7, ' ') == " 0"); 1331 assert(r.toHexString(0, '#', 9) == "0#00000000"); 1332 assert(r.toHexString(0, 0, 9) == "000000000"); 1333 } 1334 1335 // 1336 @safe pure unittest 1337 { 1338 BigUint r; 1339 r.fromHexString("1_E1178E81_00000000"); 1340 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000"); 1341 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000"); 1342 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000"); 1343 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000"); 1344 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000"); 1345 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000"); 1346 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000"); 1347 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000"); 1348 r = 0UL; 1349 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0"); 1350 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000"); 1351 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0"); 1352 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000"); 1353 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000"); 1354 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000"); 1355 } 1356 1357 1358 private: 1359 void twosComplement(const(BigDigit) [] x, BigDigit[] result) 1360 pure nothrow @safe 1361 { 1362 foreach (i; 0 .. x.length) 1363 { 1364 result[i] = ~x[i]; 1365 } 1366 result[x.length..$] = BigDigit.max; 1367 1368 foreach (i; 0 .. result.length) 1369 { 1370 if (result[i] == BigDigit.max) 1371 { 1372 result[i] = 0; 1373 } 1374 else 1375 { 1376 result[i] += 1; 1377 break; 1378 } 1379 } 1380 } 1381 1382 // Encode BigInt as BigDigit array (sign and 2's complement) 1383 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign) 1384 pure nothrow @safe 1385 { 1386 size_t length = (x.length > minSize) ? x.length : minSize; 1387 BigDigit [] result = new BigDigit[length]; 1388 if (sign) 1389 { 1390 twosComplement(x, result); 1391 } 1392 else 1393 { 1394 result[0 .. x.length] = x; 1395 } 1396 return result; 1397 } 1398 1399 // works for any type 1400 T intpow(T)(T x, ulong n) pure nothrow @safe 1401 { 1402 T p; 1403 1404 switch (n) 1405 { 1406 case 0: 1407 p = 1; 1408 break; 1409 1410 case 1: 1411 p = x; 1412 break; 1413 1414 case 2: 1415 p = x * x; 1416 break; 1417 1418 default: 1419 p = 1; 1420 while (1) 1421 { 1422 if (n & 1) 1423 p *= x; 1424 n >>= 1; 1425 if (!n) 1426 break; 1427 x *= x; 1428 } 1429 break; 1430 } 1431 return p; 1432 } 1433 1434 1435 // returns the maximum power of x that will fit in a uint. 1436 int highestPowerBelowUintMax(uint x) pure nothrow @safe 1437 { 1438 assert(x > 1, "x must be greater than 1"); 1439 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 1440 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7]; 1441 if (x<24) return maxpwr[x-2]; 1442 if (x<41) return 6; 1443 if (x<85) return 5; 1444 if (x<256) return 4; 1445 if (x<1626) return 3; 1446 if (x<65_536) return 2; 1447 return 1; 1448 } 1449 1450 // returns the maximum power of x that will fit in a ulong. 1451 int highestPowerBelowUlongMax(uint x) pure nothrow @safe 1452 { 1453 assert(x > 1, "x must be greater than 1"); 1454 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 1455 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 1456 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 1457 12, 12, 12, 12, 12, 12, 12, 12, 12]; 1458 if (x<41) return maxpwr[x-2]; 1459 if (x<57) return 11; 1460 if (x<85) return 10; 1461 if (x<139) return 9; 1462 if (x<256) return 8; 1463 if (x<566) return 7; 1464 if (x<1626) return 6; 1465 if (x<7132) return 5; 1466 if (x<65_536) return 4; 1467 if (x<2_642_246) return 3; 1468 return 2; 1469 } 1470 1471 version (StdUnittest) 1472 { 1473 1474 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe 1475 { 1476 int pwr = 1; 1477 for (ulong q = x;x*q < cast(ulong) uint.max; ) 1478 { 1479 q*=x; ++pwr; 1480 } 1481 return pwr; 1482 } 1483 1484 @safe pure unittest 1485 { 1486 assert(highestPowerBelowUintMax(10)==9); 1487 for (int k=82; k<88; ++k) 1488 { 1489 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k)); 1490 } 1491 } 1492 1493 } 1494 1495 1496 /* General unsigned subtraction routine for bigints. 1497 * Sets result = x - y. If the result is negative, negative will be true. 1498 * Returns: 1499 * unique memory 1500 */ 1501 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative) 1502 pure nothrow @safe 1503 { 1504 if (x.length == y.length) 1505 { 1506 // There's a possibility of cancellation, if x and y are almost equal. 1507 ptrdiff_t last = highestDifferentDigit(x, y); 1508 BigDigit [] result = new BigDigit[last+1]; 1509 if (x[last] < y[last]) 1510 { // we know result is negative 1511 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0); 1512 *negative = true; 1513 } 1514 else 1515 { // positive or zero result 1516 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0); 1517 *negative = false; 1518 } 1519 while (result.length > 1 && result[$-1] == 0) 1520 { 1521 result = result[0..$-1]; 1522 } 1523 // if (result.length >1 && result[$-1]==0) return result[0..$-1]; 1524 return result; 1525 } 1526 // Lengths are different 1527 const(BigDigit) [] large, small; 1528 if (x.length < y.length) 1529 { 1530 *negative = true; 1531 large = y; small = x; 1532 } 1533 else 1534 { 1535 *negative = false; 1536 large = x; small = y; 1537 } 1538 // result.length will be equal to larger length, or could decrease by 1. 1539 1540 1541 BigDigit [] result = new BigDigit[large.length]; 1542 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0); 1543 result[small.length..$] = large[small.length..$]; 1544 if (carry) 1545 { 1546 multibyteIncrementAssign!('-')(result[small.length..$], carry); 1547 } 1548 while (result.length > 1 && result[$-1] == 0) 1549 { 1550 result = result[0..$-1]; 1551 } 1552 return result; 1553 } 1554 1555 1556 /* 1557 * return a + b 1558 * Returns: 1559 * unique memory 1560 */ 1561 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe 1562 { 1563 const(BigDigit) [] x, y; 1564 if (a.length < b.length) 1565 { 1566 x = b; y = a; 1567 } 1568 else 1569 { 1570 x = a; y = b; 1571 } 1572 // now we know x.length > y.length 1573 // create result. add 1 in case it overflows 1574 BigDigit [] result = new BigDigit[x.length + 1]; 1575 1576 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0); 1577 if (x.length != y.length) 1578 { 1579 result[y.length..$-1]= x[y.length..$]; 1580 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry); 1581 } 1582 if (carry) 1583 { 1584 result[$-1] = carry; 1585 return result; 1586 } 1587 else 1588 return result[0..$-1]; 1589 } 1590 1591 /** return x + y 1592 */ 1593 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow 1594 { 1595 uint hi = cast(uint)(y >>> 32); 1596 uint lo = cast(uint)(y& 0xFFFF_FFFF); 1597 auto len = x.length; 1598 if (x.length < 2 && hi != 0) ++len; 1599 BigDigit [] result = new BigDigit[len+1]; 1600 result[0 .. x.length] = x[]; 1601 if (x.length < 2 && hi != 0) 1602 { 1603 result[1]=hi; 1604 hi=0; 1605 } 1606 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo); 1607 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi); 1608 if (carry) 1609 { 1610 result[$-1] = carry; 1611 return result; 1612 } 1613 else 1614 return result[0..$-1]; 1615 } 1616 1617 /** Return x - y. 1618 * x must be greater than y. 1619 */ 1620 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe 1621 { 1622 uint hi = cast(uint)(y >>> 32); 1623 uint lo = cast(uint)(y & 0xFFFF_FFFF); 1624 BigDigit [] result = new BigDigit[x.length]; 1625 result[] = x[]; 1626 multibyteIncrementAssign!('-')(result[], lo); 1627 if (hi) 1628 multibyteIncrementAssign!('-')(result[1..$], hi); 1629 if (result[$-1] == 0) 1630 return result[0..$-1]; 1631 else 1632 return result; 1633 } 1634 1635 /** General unsigned multiply routine for bigints. 1636 * Sets result = x * y. 1637 * 1638 * The length of y must not be larger than the length of x. 1639 * Different algorithms are used, depending on the lengths of x and y. 1640 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the 1641 * unbalanced case. (But I doubt it would be faster in practice). 1642 * 1643 */ 1644 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 1645 pure nothrow @safe 1646 { 1647 import core.memory : GC; 1648 assert( result.length == x.length + y.length, 1649 "result array must have enough space to store computed result"); 1650 assert( y.length > 0, "y must not be empty"); 1651 assert( x.length >= y.length, "x must be greater or equal than y"); 1652 if (y.length <= KARATSUBALIMIT) 1653 { 1654 // Small multiplier, we'll just use the asm classic multiply. 1655 if (y.length == 1) 1656 { // Trivial case, no cache effects to worry about 1657 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0); 1658 return; 1659 } 1660 1661 immutable CACHELIMIT = getCacheLimit; 1662 if (x.length + y.length < CACHELIMIT) 1663 return mulSimple(result, x, y); 1664 1665 // If x is so big that it won't fit into the cache, we divide it into chunks 1666 // Every chunk must be greater than y.length. 1667 // We make the first chunk shorter, if necessary, to ensure this. 1668 1669 auto chunksize = CACHELIMIT / y.length; 1670 immutable residual = x.length % chunksize; 1671 if (residual < y.length) 1672 { 1673 chunksize -= y.length; 1674 } 1675 1676 // Use schoolbook multiply. 1677 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y); 1678 auto done = chunksize; 1679 1680 while (done < x.length) 1681 { 1682 // result[done .. done+ylength] already has a value. 1683 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done; 1684 BigDigit [KARATSUBALIMIT] partial; 1685 partial[0 .. y.length] = result[done .. done+y.length]; 1686 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y); 1687 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]); 1688 done += chunksize; 1689 } 1690 return; 1691 } 1692 1693 immutable half = (x.length >> 1) + (x.length & 1); 1694 if (2*y.length*y.length <= x.length*x.length) 1695 { 1696 // UNBALANCED MULTIPLY 1697 // Use school multiply to cut into quasi-squares of Karatsuba-size 1698 // or larger. The ratio of the two sides of the 'square' must be 1699 // between 1.414:1 and 1:1. Use Karatsuba on each chunk. 1700 // 1701 // For maximum performance, we want the ratio to be as close to 1702 // 1:1 as possible. To achieve this, we can either pad x or y. 1703 // The best choice depends on the modulus x%y. 1704 auto numchunks = x.length / y.length; 1705 auto chunksize = y.length; 1706 auto extra = x.length % y.length; 1707 auto maxchunk = chunksize + extra; 1708 bool paddingY; // true = we're padding Y, false = we're padding X. 1709 bool isExtraSmall = extra * extra * 2 < y.length * y.length; 1710 if (numchunks == 1 && isExtraSmall) 1711 { 1712 // We divide (x_first_half * y) and (x_last_half * y) 1713 // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)). 1714 // (1.414 ~ 1.707)/2:1 is balanced. 1715 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length]; 1716 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1717 scratchbuff = scratchbuff[0 .. $ - y.length]; 1718 mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff); 1719 partial[] = result[half .. half + y.length]; 1720 mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff); 1721 BigDigit c = addAssignSimple(result[half .. half + y.length], partial); 1722 if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c); 1723 () @trusted { GC.free(scratchbuff.ptr); } (); 1724 } 1725 else 1726 { 1727 if (isExtraSmall) 1728 { 1729 // The leftover bit is small enough that it should be incorporated 1730 // in the existing chunks. 1731 // Make all the chunks a tiny bit bigger 1732 // (We're padding y with zeros) 1733 chunksize += extra / numchunks; 1734 extra = x.length - chunksize*numchunks; 1735 // there will probably be a few left over. 1736 // Every chunk will either have size chunksize, or chunksize+1. 1737 maxchunk = chunksize + 1; 1738 paddingY = true; 1739 assert(chunksize + extra + chunksize *(numchunks-1) == x.length, 1740 "Unexpected size"); 1741 } 1742 else 1743 { 1744 // the extra bit is large enough that it's worth making a new chunk. 1745 // (This means we're padding x with zeros, when doing the first one). 1746 maxchunk = chunksize; 1747 ++numchunks; 1748 paddingY = false; 1749 assert(extra + chunksize *(numchunks-1) == x.length, 1750 "Unexpected size"); 1751 } 1752 // We make the buffer a bit bigger so we have space for the partial sums. 1753 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length]; 1754 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1755 scratchbuff = scratchbuff[0 .. $ - y.length]; 1756 size_t done; // how much of X have we done so far? 1757 if (paddingY) 1758 { 1759 // If the first chunk is bigger, do it first. We're padding y. 1760 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )], 1761 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 1762 done = chunksize + (extra > 0 ? 1 : 0); 1763 if (extra) --extra; 1764 } 1765 else 1766 { // We're padding X. Begin with the extra bit. 1767 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff); 1768 done = extra; 1769 extra = 0; 1770 } 1771 immutable basechunksize = chunksize; 1772 while (done < x.length) 1773 { 1774 chunksize = basechunksize + (extra > 0 ? 1 : 0); 1775 if (extra) --extra; 1776 partial[] = result[done .. done+y.length]; 1777 mulKaratsuba(result[done .. done + y.length + chunksize], 1778 x[done .. done+chunksize], y, scratchbuff); 1779 addAssignSimple(result[done .. done + y.length + chunksize], partial); 1780 done += chunksize; 1781 } 1782 () @trusted { GC.free(scratchbuff.ptr); } (); 1783 } 1784 } 1785 else 1786 { 1787 // Balanced. Use Karatsuba directly. 1788 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1789 mulKaratsuba(result, x, y, scratchbuff); 1790 () @trusted { GC.free(scratchbuff.ptr); } (); 1791 } 1792 } 1793 1794 // https://issues.dlang.org/show_bug.cgi?id=20493 1795 @safe unittest 1796 { 1797 // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300) 1798 // the number itself isn't important, only the amount of digits, so we do a simpler 1799 // multiplication of the same size, analogous to: 1800 // 11111111 * 11111111 = 0123456787654321 1801 // but instead of base 10, it's in base `BigDigit` 1802 1803 BigDigit[398] x = 1; 1804 BigDigit[236] y = 1; 1805 BigDigit[x.length + y.length] result; 1806 mulInternal(result[], x[], y[]); 1807 1808 // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0] 1809 BigDigit[x.length + y.length] expected = y.length; 1810 foreach (BigDigit i; 0 .. y.length) 1811 { 1812 expected[i] = i+1; 1813 expected[$-1-i] = i; 1814 } 1815 1816 assert(result == expected); 1817 } 1818 1819 /** General unsigned squaring routine for BigInts. 1820 * Sets result = x*x. 1821 * NOTE: If the highest half-digit of x is zero, the highest digit of result will 1822 * also be zero. 1823 */ 1824 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe 1825 { 1826 import core.memory : GC; 1827 // Squaring is potentially half a multiply, plus add the squares of 1828 // the diagonal elements. 1829 assert(result.length == 2*x.length, 1830 "result needs to have twice the capacity of x"); 1831 if (x.length <= KARATSUBASQUARELIMIT) 1832 { 1833 if (x.length == 1) 1834 { 1835 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0); 1836 return; 1837 } 1838 return squareSimple(result, x); 1839 } 1840 // The nice thing about squaring is that it always stays balanced 1841 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1842 squareKaratsuba(result, x, scratchbuff); 1843 () @trusted { GC.free(scratchbuff.ptr); } (); 1844 } 1845 1846 1847 import core.bitop : bsr; 1848 1849 /// if remainder is null, only calculate quotient. 1850 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u, 1851 const BigDigit [] v) pure nothrow @safe 1852 { 1853 import core.memory : GC; 1854 assert(quotient.length == u.length - v.length + 1, 1855 "Invalid quotient length"); 1856 assert(remainder == null || remainder.length == v.length, 1857 "Invalid remainder"); 1858 assert(v.length > 1, "v must have more than 1 element"); 1859 assert(u.length >= v.length, "u must be as longer or longer than v"); 1860 1861 // Normalize by shifting v left just enough so that 1862 // its high-order bit is on, and shift u left the 1863 // same amount. The highest bit of u will never be set. 1864 1865 BigDigit [] vn = new BigDigit[v.length]; 1866 BigDigit [] un = new BigDigit[u.length + 1]; 1867 // How much to left shift v, so that its MSB is set. 1868 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]); 1869 if (s != 0) 1870 { 1871 multibyteShl(vn, v, s); 1872 un[$-1] = multibyteShl(un[0..$-1], u, s); 1873 } 1874 else 1875 { 1876 vn[] = v[]; 1877 un[0..$-1] = u[]; 1878 un[$-1] = 0; 1879 } 1880 if (quotient.length<FASTDIVLIMIT) 1881 { 1882 schoolbookDivMod(quotient, un, vn); 1883 } 1884 else 1885 { 1886 blockDivMod(quotient, un, vn); 1887 } 1888 1889 // Unnormalize remainder, if required. 1890 if (remainder != null) 1891 { 1892 if (s == 0) remainder[] = un[0 .. vn.length]; 1893 else multibyteShr(remainder, un[0 .. vn.length+1], s); 1894 } 1895 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } (); 1896 } 1897 1898 pure @safe unittest 1899 { 1900 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000]; 1901 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000]; 1902 uint [] q = new uint[u.length - v.length + 1]; 1903 uint [] r = new uint[2]; 1904 divModInternal(q, r, u, v); 1905 assert(q[]==[0xFFFF_FFFFu, 0]); 1906 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]); 1907 u = [0, 0xFFFF_FFFE, 0x8000_0001]; 1908 v = [0xFFFF_FFFF, 0x8000_0000]; 1909 divModInternal(q, r, u, v); 1910 } 1911 1912 1913 // Converts a big uint to a hexadecimal string. 1914 // 1915 // Optionally, a separator character (eg, an underscore) may be added between 1916 // every 8 digits. 1917 // buff.length must be data.length*8 if separator is zero, 1918 // or data.length*9 if separator is non-zero. It will be completely filled. 1919 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0, 1920 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 1921 { 1922 int x=0; 1923 for (ptrdiff_t i=data.length - 1; i >= 0; --i) 1924 { 1925 toHexZeroPadded(buff[x .. x+8], data[i], letterCase); 1926 x+=8; 1927 if (separator) 1928 { 1929 if (i>0) buff[x] = separator; 1930 ++x; 1931 } 1932 } 1933 return buff; 1934 } 1935 1936 /** 1937 * Convert a big uint into an octal string. 1938 * 1939 * Params: 1940 * buff = The destination buffer for the octal string. Must be large enough to 1941 * store the result, including leading zeroes, which is 1942 * ceil(data.length * BigDigitBits / 3) characters. 1943 * The buffer is filled from back to front, starting from `buff[$-1]`. 1944 * data = The biguint to be converted. 1945 * 1946 * Returns: The index of the leading non-zero digit in `buff`. Will be 1947 * `buff.length - 1` if the entire big uint is zero. 1948 */ 1949 size_t biguintToOctal(char[] buff, const(BigDigit)[] data) 1950 pure nothrow @safe @nogc 1951 { 1952 ubyte carry = 0; 1953 int shift = 0; 1954 size_t penPos = buff.length - 1; 1955 size_t lastNonZero = buff.length - 1; 1956 1957 pragma(inline) void output(uint digit) @nogc nothrow 1958 { 1959 if (digit != 0) 1960 lastNonZero = penPos; 1961 buff[penPos--] = cast(char)('0' + digit); 1962 } 1963 1964 foreach (bigdigit; data) 1965 { 1966 if (shift < 0) 1967 { 1968 // Some bits were carried over from previous word. 1969 assert(shift > -3, "shift must be greater than -3"); 1970 output(((bigdigit << -shift) | carry) & 0b111); 1971 shift += 3; 1972 assert(shift > 0, "shift must be 1 or greater"); 1973 } 1974 1975 while (shift <= BigDigitBits - 3) 1976 { 1977 output((bigdigit >>> shift) & 0b111); 1978 shift += 3; 1979 } 1980 1981 if (shift < BigDigitBits) 1982 { 1983 // Some bits need to be carried forward. 1984 carry = (bigdigit >>> shift) & 0b11; 1985 } 1986 shift -= BigDigitBits; 1987 assert(shift >= -2 && shift <= 0, "shift must in [-2,0]"); 1988 } 1989 1990 if (shift < 0) 1991 { 1992 // Last word had bits that haven't been output yet. 1993 assert(shift > -3, "Shift must be greater than -3"); 1994 output(carry); 1995 } 1996 1997 return lastNonZero; 1998 } 1999 2000 /** Convert a big uint into a decimal string. 2001 * 2002 * Params: 2003 * data The biguint to be converted. Will be destroyed. 2004 * buff The destination buffer for the decimal string. Must be 2005 * large enough to store the result, including leading zeros. 2006 * Will be filled backwards, starting from buff[$-1]. 2007 * 2008 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length. 2009 * Returns: 2010 * the lowest index of buff which was used. 2011 */ 2012 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe 2013 { 2014 ptrdiff_t sofar = buff.length; 2015 // Might be better to divide by (10^38/2^32) since that gives 38 digits for 2016 // the price of 3 divisions and a shr; this version only gives 27 digits 2017 // for 3 divisions. 2018 while (data.length>1) 2019 { 2020 uint rem = multibyteDivAssign(data, 10_0000_0000, 0); 2021 itoaZeroPadded(buff[sofar-9 .. sofar], rem); 2022 sofar -= 9; 2023 if (data[$-1] == 0 && data.length > 1) 2024 { 2025 data.length = data.length - 1; 2026 } 2027 } 2028 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]); 2029 sofar -= 10; 2030 // and strip off the leading zeros 2031 while (sofar != buff.length-1 && buff[sofar] == '0') 2032 sofar++; 2033 return sofar; 2034 } 2035 2036 /** Convert a decimal string into a big uint. 2037 * 2038 * Params: 2039 * data The biguint to be receive the result. Must be large enough to 2040 * store the result. 2041 * s The decimal string. May contain _ or 0 .. 9 2042 * 2043 * The required length for the destination buffer is slightly less than 2044 * 1 + s.length/log2(10) = 1 + s.length/3.3219. 2045 * 2046 * Returns: 2047 * the highest index of data which was used. 2048 */ 2049 int biguintFromDecimal(Range)(BigDigit[] data, Range s) 2050 if ( 2051 isInputRange!Range && 2052 isSomeChar!(ElementType!Range) && 2053 !isInfinite!Range) 2054 in 2055 { 2056 static if (hasLength!Range) 2057 assert((data.length >= 2) || (data.length == 1 && s.length == 1), 2058 "data has a invalid length"); 2059 } 2060 do 2061 { 2062 import std.conv : ConvException; 2063 2064 // Convert to base 1e19 = 10_000_000_000_000_000_000. 2065 // (this is the largest power of 10 that will fit into a long). 2066 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219. 2067 // 485 bits will only just fit into 146 decimal digits. 2068 // As we convert the string, we record the number of digits we've seen in base 19: 2069 // hi is the number of digits/19, lo is the extra digits (0 to 18). 2070 // TODO: This is inefficient for very large strings (it is O(n^^2)). 2071 // We should take advantage of fast multiplication once the numbers exceed 2072 // Karatsuba size. 2073 uint lo = 0; // number of powers of digits, 0 .. 18 2074 uint x = 0; 2075 ulong y = 0; 2076 uint hi = 0; // number of base 1e19 digits 2077 data[0] = 0; // initially number is 0. 2078 if (data.length > 1) 2079 data[1] = 0; 2080 2081 foreach (character; s) 2082 { 2083 if (character == '_') 2084 continue; 2085 2086 if (character < '0' || character > '9') 2087 throw new ConvException("invalid digit"); 2088 x *= 10; 2089 x += character - '0'; 2090 ++lo; 2091 if (lo == 9) 2092 { 2093 y = x; 2094 x = 0; 2095 } 2096 if (lo == 18) 2097 { 2098 y *= 10_0000_0000; 2099 y += x; 2100 x = 0; 2101 } 2102 if (lo == 19) 2103 { 2104 y *= 10; 2105 y += x; 2106 x = 0; 2107 // Multiply existing number by 10^19, then add y1. 2108 if (hi>0) 2109 { 2110 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A 2111 ++hi; 2112 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000 2113 ++hi; 2114 } 2115 else 2116 hi = 2; 2117 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2118 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2119 if (c != 0) 2120 { 2121 data[hi]=c; 2122 ++hi; 2123 } 2124 y = 0; 2125 lo = 0; 2126 } 2127 } 2128 // Now set y = all remaining digits. 2129 if (lo >= 18) 2130 { 2131 } 2132 else if (lo >= 9) 2133 { 2134 for (int k=9; k<lo; ++k) y*=10; 2135 y+=x; 2136 } 2137 else 2138 { 2139 for (int k=0; k<lo; ++k) y*=10; 2140 y+=x; 2141 } 2142 if (lo != 0) 2143 { 2144 if (hi == 0) 2145 { 2146 data[0] = cast(uint) y; 2147 if (data.length == 1) 2148 { 2149 hi = 1; 2150 } 2151 else 2152 { 2153 data[1] = cast(uint)(y >>> 32); 2154 hi=2; 2155 } 2156 } 2157 else 2158 { 2159 while (lo>0) 2160 { 2161 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0); 2162 if (c != 0) 2163 { 2164 data[hi]=c; 2165 ++hi; 2166 } 2167 --lo; 2168 } 2169 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF)); 2170 if (y > 0xFFFF_FFFFL) 2171 { 2172 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32)); 2173 } 2174 if (c != 0) 2175 { 2176 data[hi]=c; 2177 ++hi; 2178 } 2179 } 2180 } 2181 while (hi>1 && data[hi-1]==0) 2182 --hi; 2183 return hi; 2184 } 2185 2186 2187 // ------------------------ 2188 // These in-place functions are only for internal use; they are incompatible 2189 // with COW. 2190 2191 // Classic 'schoolbook' multiplication. 2192 void mulSimple(BigDigit[] result, const(BigDigit) [] left, 2193 const(BigDigit)[] right) pure nothrow @safe 2194 in 2195 { 2196 assert(result.length == left.length + right.length, 2197 "Result must be able to store left + right"); 2198 assert(right.length>1, "right must not be empty"); 2199 } 2200 do 2201 { 2202 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0); 2203 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 2204 } 2205 2206 // Classic 'schoolbook' squaring 2207 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe 2208 in 2209 { 2210 assert(result.length == 2*x.length, "result must be twice as long as x"); 2211 assert(x.length>1, "x must not be empty"); 2212 } 2213 do 2214 { 2215 multibyteSquare(result, x); 2216 } 2217 2218 2219 // add two uints of possibly different lengths. Result must be as long 2220 // as the larger length. 2221 // Returns carry (0 or 1). 2222 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right) 2223 pure nothrow @safe 2224 in 2225 { 2226 assert(result.length == left.length, 2227 "result and left must be of the same length"); 2228 assert(left.length >= right.length, 2229 "left must be longer or of equal length to right"); 2230 assert(right.length > 0, "right must not be empty"); 2231 } 2232 do 2233 { 2234 uint carry = multibyteAdd(result[0 .. right.length], 2235 left[0 .. right.length], right, 0); 2236 if (right.length < left.length) 2237 { 2238 result[right.length .. left.length] = left[right.length .. $]; 2239 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 2240 } 2241 return carry; 2242 } 2243 2244 // result = left - right 2245 // returns carry (0 or 1) 2246 BigDigit subSimple(BigDigit [] result,const(BigDigit) [] left, 2247 const(BigDigit) [] right) pure nothrow 2248 in 2249 { 2250 assert(result.length == left.length, 2251 "result and left must be of the same length"); 2252 assert(left.length >= right.length, 2253 "left must be longer or of equal length to right"); 2254 assert(right.length > 0, "right must not be empty"); 2255 } 2256 do 2257 { 2258 BigDigit carry = multibyteSub(result[0 .. right.length], 2259 left[0 .. right.length], right, 0); 2260 if (right.length < left.length) 2261 { 2262 result[right.length .. left.length] = left[right.length .. $]; 2263 carry = multibyteIncrementAssign!('-')(result[right.length..$], carry); 2264 } //else if (result.length == left.length+1) { result[$-1] = carry; carry=0; } 2265 return carry; 2266 } 2267 2268 2269 /* result = result - right 2270 * Returns carry = 1 if result was less than right. 2271 */ 2272 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2273 pure nothrow @safe 2274 { 2275 assert(result.length >= right.length, 2276 "result must be longer or of equal length to right"); 2277 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0); 2278 if (c && result.length > right.length) 2279 c = multibyteIncrementAssign!('-')(result[right.length .. $], c); 2280 return c; 2281 } 2282 2283 /* result = result + right 2284 */ 2285 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right) 2286 pure nothrow @safe 2287 { 2288 assert(result.length >= right.length, 2289 "result must be longer or of equal length to right"); 2290 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0); 2291 if (c && result.length > right.length) 2292 c = multibyteIncrementAssign!('+')(result[right.length .. $], c); 2293 return c; 2294 } 2295 2296 /* performs result += wantSub? - right : right; 2297 */ 2298 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right, 2299 bool wantSub) pure nothrow @safe 2300 { 2301 if (wantSub) 2302 return subAssignSimple(result, right); 2303 else 2304 return addAssignSimple(result, right); 2305 } 2306 2307 2308 // return true if x<y, considering leading zeros 2309 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe 2310 { 2311 assert(x.length >= y.length, 2312 "x must be longer or of equal length to y"); 2313 auto k = x.length-1; 2314 while (x[k]==0 && k >= y.length) 2315 --k; 2316 if (k >= y.length) 2317 return false; 2318 while (k>0 && x[k]==y[k]) 2319 --k; 2320 return x[k] < y[k]; 2321 } 2322 2323 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y. 2324 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y) 2325 pure nothrow @safe 2326 { 2327 assert(result.length == ((x.length >= y.length) ? x.length : y.length), 2328 "result must capable to store the maximum of x and y"); 2329 2330 size_t minlen; 2331 bool negative; 2332 if (x.length >= y.length) 2333 { 2334 minlen = y.length; 2335 negative = less(x, y); 2336 } 2337 else 2338 { 2339 minlen = x.length; 2340 negative = !less(y, x); 2341 } 2342 const (BigDigit)[] large, small; 2343 if (negative) 2344 { 2345 large = y; small = x; 2346 } 2347 else 2348 { 2349 large = x; small = y; 2350 } 2351 2352 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0); 2353 if (x.length != y.length) 2354 { 2355 result[minlen .. large.length]= large[minlen..$]; 2356 result[large.length..$] = 0; 2357 if (carry) 2358 multibyteIncrementAssign!('-')(result[minlen..$], carry); 2359 } 2360 return negative; 2361 } 2362 2363 /* Determine how much space is required for the temporaries 2364 * when performing a Karatsuba multiplication. 2365 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see: 2366 * https://issues.dlang.org/show_bug.cgi?id=20493 2367 */ 2368 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe 2369 { 2370 return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4; 2371 } 2372 2373 /* Sets result = x*y, using Karatsuba multiplication. 2374 * x must be longer or equal to y. 2375 * Valid only for balanced multiplies, where x is not shorter than y. 2376 * It is superior to schoolbook multiplication if and only if 2377 * sqrt(2)*y.length > x.length > y.length. 2378 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2) 2379 * The maximum allowable length of x and y is uint.max; but better algorithms 2380 * should be used far before that length is reached. 2381 * Params: 2382 * scratchbuff An array long enough to store all the temporaries. Will be destroyed. 2383 */ 2384 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x, 2385 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe 2386 { 2387 assert(x.length >= y.length, "x must be greater or equal to y"); 2388 assert(result.length < uint.max, "Operands too large"); 2389 assert(result.length == x.length + y.length, 2390 "result must be as large as x + y"); 2391 if (x.length <= KARATSUBALIMIT) 2392 { 2393 return mulSimple(result, x, y); 2394 } 2395 // Must be almost square (otherwise, a schoolbook iteration is better) 2396 assert(2L * y.length * y.length > (x.length-1) * (x.length-1), 2397 "Bigint Internal Error: Asymmetric Karatsuba"); 2398 2399 // The subtractive version of Karatsuba multiply uses the following result: 2400 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid) 2401 // where mid = (x0-x1)*(y0-y1) 2402 // requiring 3 multiplies of length N, instead of 4. 2403 // The advantage of the subtractive over the additive version is that 2404 // the mid multiply cannot exceed length N. But there are subtleties: 2405 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we 2406 // retain all of the leading zeros in the subtractions 2407 2408 // half length, round up. 2409 auto half = (x.length >> 1) + (x.length & 1); 2410 2411 const(BigDigit) [] x0 = x[0 .. half]; 2412 const(BigDigit) [] x1 = x[half .. $]; 2413 const(BigDigit) [] y0 = y[0 .. half]; 2414 const(BigDigit) [] y1 = y[half .. $]; 2415 BigDigit [] mid = scratchbuff[0 .. half*2]; 2416 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2417 BigDigit [] resultLow = result[0 .. 2*half]; 2418 BigDigit [] resultHigh = result[2*half .. $]; 2419 // initially use result to store temporaries 2420 BigDigit [] xdiff= result[0 .. half]; 2421 BigDigit [] ydiff = result[half .. half*2]; 2422 2423 // First, we calculate mid, and sign of mid 2424 immutable bool midNegative = inplaceSub(xdiff, x0, x1) 2425 ^ inplaceSub(ydiff, y0, y1); 2426 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff); 2427 2428 // Low half of result gets x0 * y0. High half gets x1 * y1 2429 2430 mulKaratsuba(resultLow, x0, y0, newscratchbuff); 2431 2432 if (2L * y1.length * y1.length < x1.length * x1.length) 2433 { 2434 // an asymmetric situation has been created. 2435 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1. 2436 // Applying one schoolbook multiply gives us two pieces each 1.2:1 2437 if (y1.length <= KARATSUBALIMIT) 2438 mulSimple(resultHigh, x1, y1); 2439 else 2440 { 2441 // divide x1 in two, then use schoolbook multiply on the two pieces. 2442 auto quarter = (x1.length >> 1) + (x1.length & 1); 2443 immutable ysmaller = (quarter >= y1.length); 2444 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1, 2445 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff); 2446 // Save the part which will be overwritten. 2447 immutable ysmaller2 = ((x1.length - quarter) >= y1.length); 2448 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length]; 2449 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1, 2450 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]); 2451 2452 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]); 2453 } 2454 } 2455 else 2456 mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 2457 2458 /* We now have result = x0y0 + (N*N)*x1y1 2459 Before adding or subtracting mid, we must calculate 2460 result += N * (x0y0 + x1y1) 2461 We can do this with three half-length additions. With a = x0y0, b = x1y1: 2462 aHI aLO 2463 + aHI aLO 2464 + bHI bLO 2465 + bHI bLO 2466 = R3 R2 R1 R0 2467 R1 = aHI + bLO + aLO 2468 R2 = aHI + bLO + aHI + carry_from_R1 2469 R3 = bHi + carry_from_R2 2470 2471 It might actually be quicker to do it in two full-length additions: 2472 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]); 2473 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]); 2474 */ 2475 BigDigit[] R1 = result[half .. half*2]; 2476 BigDigit[] R2 = result[half*2 .. half*3]; 2477 BigDigit[] R3 = result[half*3..$]; 2478 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2479 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2480 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2481 if (c1+c2) 2482 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2483 if (c1+c3) 2484 multibyteIncrementAssign!('+')(R3, c1+c3); 2485 2486 // And finally we subtract mid 2487 addOrSubAssignSimple(result[half..$], mid, !midNegative); 2488 } 2489 2490 void squareKaratsuba(BigDigit [] result, const BigDigit [] x, 2491 BigDigit [] scratchbuff) pure nothrow @safe 2492 { 2493 // See mulKaratsuba for implementation comments. 2494 // Squaring is simpler, since it never gets asymmetric. 2495 assert(result.length < uint.max, "Operands too large"); 2496 assert(result.length == 2*x.length, 2497 "result must be twice the length of x"); 2498 if (x.length <= KARATSUBASQUARELIMIT) 2499 { 2500 return squareSimple(result, x); 2501 } 2502 // half length, round up. 2503 auto half = (x.length >> 1) + (x.length & 1); 2504 2505 const(BigDigit)[] x0 = x[0 .. half]; 2506 const(BigDigit)[] x1 = x[half .. $]; 2507 BigDigit [] mid = scratchbuff[0 .. half*2]; 2508 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 2509 // initially use result to store temporaries 2510 BigDigit [] xdiff= result[0 .. half]; 2511 const BigDigit [] ydiff = result[half .. half*2]; 2512 2513 // First, we calculate mid. We don't need its sign 2514 inplaceSub(xdiff, x0, x1); 2515 squareKaratsuba(mid, xdiff, newscratchbuff); 2516 2517 // Set result = x0x0 + (N*N)*x1x1 2518 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff); 2519 squareKaratsuba(result[2*half .. $], x1, newscratchbuff); 2520 2521 /* result += N * (x0x0 + x1x1) 2522 Do this with three half-length additions. With a = x0x0, b = x1x1: 2523 R1 = aHI + bLO + aLO 2524 R2 = aHI + bLO + aHI + carry_from_R1 2525 R3 = bHi + carry_from_R2 2526 */ 2527 BigDigit[] R1 = result[half .. half*2]; 2528 BigDigit[] R2 = result[half*2 .. half*3]; 2529 BigDigit[] R3 = result[half*3..$]; 2530 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 2531 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0 2532 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 2533 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 2534 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 2535 2536 // And finally we subtract mid, which is always positive 2537 subAssignSimple(result[half..$], mid); 2538 } 2539 2540 /* Knuth's Algorithm D, as presented in 2541 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002). 2542 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18. 2543 * Given u and v, calculates quotient = u / v, u = u % v. 2544 * v must be normalized (ie, the MSB of v must be 1). 2545 * The most significant words of quotient and u may be zero. 2546 * u[0 .. v.length] holds the remainder. 2547 */ 2548 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2549 pure nothrow @safe 2550 { 2551 assert(quotient.length == u.length - v.length, 2552 "quotient has wrong length"); 2553 assert(v.length > 1, "v must not be empty"); 2554 assert(u.length >= v.length, "u must be larger or equal to v"); 2555 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2556 assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]"); 2557 // BUG: This code only works if BigDigit is uint. 2558 uint vhi = v[$-1]; 2559 uint vlo = v[$-2]; 2560 2561 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--) 2562 { 2563 // Compute estimate of quotient[j], 2564 // qhat = (three most significant words of u)/(two most sig words of v). 2565 uint qhat; 2566 if (u[j + v.length] == vhi) 2567 { 2568 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001) 2569 qhat = uint.max; 2570 } 2571 else 2572 { 2573 uint ulo = u[j + v.length - 2]; 2574 version (D_InlineAsm_X86) 2575 { 2576 // Note: On DMD, this is only ~10% faster than the non-asm code. 2577 uint *p = &u[j + v.length - 1]; 2578 asm pure nothrow @trusted 2579 { 2580 mov EAX, p; 2581 mov EDX, [EAX+4]; 2582 mov EAX, [EAX]; 2583 div dword ptr [vhi]; 2584 mov qhat, EAX; 2585 mov ECX, EDX; 2586 div3by2correction: 2587 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo 2588 sub EAX, ulo; 2589 sbb EDX, ECX; 2590 jbe div3by2done; 2591 mov EAX, qhat; 2592 dec EAX; 2593 mov qhat, EAX; 2594 add ECX, dword ptr [vhi]; 2595 jnc div3by2correction; 2596 div3by2done: ; 2597 } 2598 } 2599 else 2600 { // version (InlineAsm) 2601 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1]; 2602 immutable bigqhat = uu / vhi; 2603 ulong rhat = uu - bigqhat * vhi; 2604 qhat = cast(uint) bigqhat; 2605 again: 2606 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo)) 2607 { 2608 --qhat; 2609 rhat += vhi; 2610 if (!(rhat & 0xFFFF_FFFF_0000_0000L)) 2611 goto again; 2612 } 2613 } // version (InlineAsm) 2614 } 2615 // Multiply and subtract. 2616 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0); 2617 2618 if (u[j+v.length] < carry) 2619 { 2620 // If we subtracted too much, add back 2621 --qhat; 2622 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0); 2623 } 2624 quotient[j] = qhat; 2625 u[j + v.length] = u[j + v.length] - carry; 2626 } 2627 } 2628 2629 // TODO: Replace with a library call 2630 void itoaZeroPadded(char[] output, uint value) 2631 pure nothrow @safe @nogc 2632 { 2633 for (auto i = output.length; i--;) 2634 { 2635 if (value < 10) 2636 { 2637 output[i] = cast(char)(value + '0'); 2638 value = 0; 2639 } 2640 else 2641 { 2642 output[i] = cast(char)(value % 10 + '0'); 2643 value /= 10; 2644 } 2645 } 2646 } 2647 2648 void toHexZeroPadded(char[] output, uint value, 2649 LetterCase letterCase = LetterCase.upper) pure nothrow @safe 2650 { 2651 ptrdiff_t x = output.length - 1; 2652 static immutable string upperHexDigits = "0123456789ABCDEF"; 2653 static immutable string lowerHexDigits = "0123456789abcdef"; 2654 for ( ; x >= 0; --x) 2655 { 2656 if (letterCase == LetterCase.upper) 2657 { 2658 output[x] = upperHexDigits[value & 0xF]; 2659 } 2660 else 2661 { 2662 output[x] = lowerHexDigits[value & 0xF]; 2663 } 2664 value >>= 4; 2665 } 2666 } 2667 2668 // Returns the highest value of i for which left[i]!=right[i], 2669 // or 0 if left[] == right[] 2670 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right) 2671 pure nothrow @nogc @safe 2672 { 2673 assert(left.length == right.length, 2674 "left have a length equal to that of right"); 2675 for (ptrdiff_t i = left.length - 1; i>0; --i) 2676 { 2677 if (left[i] != right[i]) 2678 return i; 2679 } 2680 return 0; 2681 } 2682 2683 // Returns the lowest value of i for which x[i]!=0. 2684 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe 2685 { 2686 int k = 0; 2687 while (x[k]==0) 2688 { 2689 ++k; 2690 assert(k < x.length, "k must be less than x.length"); 2691 } 2692 return k; 2693 } 2694 2695 /* 2696 Calculate quotient and remainder of u / v using fast recursive division. 2697 v must be normalised, and must be at least half as long as u. 2698 Given u and v, v normalised, calculates quotient = u/v, u = u%v. 2699 scratch is temporary storage space, length must be >= quotient + 1. 2700 2701 Returns: 2702 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted. 2703 2704 Implements algorithm 1.8 from MCA. 2705 This algorithm has an annoying special case. After the first recursion, the 2706 highest bit of the quotient may be set. This means that in the second 2707 recursive call, the 'in' contract would be violated. (This happens only 2708 when the top quarter of u is equal to the top half of v. A base 10 2709 equivalent example of this situation is 5517/56; the first step gives 2710 55/5 = 11). To maintain the in contract, we pad a zero to the top of both 2711 u and the quotient. 'mayOverflow' indicates that that the special case 2712 has occurred. 2713 (In MCA, a different strategy is used: the in contract is weakened, and 2714 schoolbookDivMod is more general: it allows the high bit of u to be set). 2715 See also: 2716 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 2717 Max-Planck Institute fuer Informatik, (Oct 1998). 2718 */ 2719 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v, 2720 BigDigit[] scratch, bool mayOverflow = false) 2721 pure nothrow @safe 2722 in 2723 { 2724 // v must be normalized 2725 assert(v.length > 1, "v must not be empty"); 2726 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]"); 2727 assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]"); 2728 assert(quotient.length == u.length - v.length, 2729 "quotient must be of equal length of u - v"); 2730 if (mayOverflow) 2731 { 2732 assert(u[$-1] == 0, "Invalid value at u[$ - 1]"); 2733 assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]"); 2734 } 2735 2736 // Must be symmetric. Use block schoolbook division if not. 2737 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length, 2738 "Invalid length of u"); 2739 assert((mayOverflow ? u.length-1 : u.length) >= v.length, 2740 "Invalid length of u"); 2741 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1), 2742 "Invalid quotient length"); 2743 } 2744 do 2745 { 2746 if (quotient.length < FASTDIVLIMIT) 2747 { 2748 return schoolbookDivMod(quotient, u, v); 2749 } 2750 2751 // Split quotient into two halves, but keep padding in the top half 2752 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1; 2753 2754 // RECURSION 1: Calculate the high half of the quotient 2755 2756 // Note that if u and quotient were padded, they remain padded during 2757 // this call, so in contract is satisfied. 2758 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $], 2759 scratch, mayOverflow); 2760 2761 // quotient[k..$] is our guess at the high quotient. 2762 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the 2763 // first remainder. u[0 .. 2*k] is the low part. 2764 2765 // Calculate the full first remainder to be 2766 // remainder - highQuotient * lowDivisor 2767 // reducing highQuotient until the remainder is positive. 2768 // The low part of the remainder, u[0 .. k], cannot be altered by this. 2769 2770 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k, 2771 scratch[0 .. quotient.length], mayOverflow); 2772 2773 // RECURSION 2: Calculate the low half of the quotient 2774 // The full first remainder is now in u[0 .. k + v.length]. 2775 2776 if (u[k + v.length - 1] & 0x8000_0000) 2777 { 2778 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001. 2779 // This means we need an extra quotient word for the next recursion. 2780 // We need to restore the invariant for the recursive calls. 2781 // We do this by padding both u and quotient. Extending u is trivial, 2782 // because the higher words will not be used again. But for the 2783 // quotient, we're clobbering the low word of the high quotient, 2784 // so we need save it, and add it back in after the recursive call. 2785 2786 auto clobberedQuotient = quotient[k]; 2787 u[k+v.length] = 0; 2788 2789 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1], 2790 v[k .. $], scratch, true); 2791 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k, 2792 scratch[0 .. 2 * k+1], true); 2793 2794 // Now add the quotient word that got clobbered earlier. 2795 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient); 2796 } 2797 else 2798 { 2799 // The special case has NOT happened. 2800 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $], 2801 scratch, false); 2802 2803 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length] 2804 2805 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k, 2806 scratch[0 .. 2 * k]); 2807 } 2808 } 2809 2810 // rem -= quot * v[0 .. k]. 2811 // If would make rem negative, decrease quot until rem is >= 0. 2812 // Needs (quot.length * k) scratch space to store the result of the multiply. 2813 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v, 2814 ptrdiff_t k, 2815 BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe 2816 { 2817 assert(rem.length == v.length, "rem must be as long as v"); 2818 mulInternal(scratch, quot, v[0 .. k]); 2819 uint carry = 0; 2820 if (mayOverflow) 2821 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]); 2822 else 2823 carry = subAssignSimple(rem, scratch); 2824 while (carry) 2825 { 2826 multibyteIncrementAssign!('-')(quot, 1); // quot-- 2827 carry -= multibyteAdd(rem, rem, v, 0); 2828 } 2829 } 2830 2831 // Cope with unbalanced division by performing block schoolbook division. 2832 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 2833 pure nothrow @safe 2834 { 2835 import core.memory : GC; 2836 assert(quotient.length == u.length - v.length, 2837 "quotient must be of equal length of u - v"); 2838 assert(v.length > 1, "v must not be empty"); 2839 assert(u.length >= v.length, "u must be longer or of equal length as v"); 2840 assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]"); 2841 assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]"); 2842 BigDigit [] scratch = new BigDigit[v.length + 1]; 2843 2844 // Perform block schoolbook division, with 'v.length' blocks. 2845 auto m = u.length - v.length; 2846 while (m > v.length) 2847 { 2848 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0; 2849 BigDigit saveq; 2850 if (mayOverflow) 2851 { 2852 u[m + v.length] = 0; 2853 saveq = quotient[m]; 2854 } 2855 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)], 2856 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow); 2857 if (mayOverflow) 2858 { 2859 assert(quotient[m] == 0, "quotient must not be 0"); 2860 quotient[m] = saveq; 2861 } 2862 m -= v.length; 2863 } 2864 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch); 2865 () @trusted { GC.free(scratch.ptr); } (); 2866 } 2867 2868 @system unittest 2869 { 2870 version (none) 2871 { 2872 import core.stdc.stdio; 2873 2874 void printBiguint(const uint [] data) 2875 { 2876 char [] buff = biguintToHex(new char[data.length*9], data, '_'); 2877 printf("%.*s\n", cast(int) buff.length, buff.ptr); 2878 } 2879 2880 void printDecimalBigUint(BigUint data) 2881 { 2882 auto str = data.toDecimalString(0); 2883 printf("%.*s\n", cast(int) str.length, str.ptr); 2884 } 2885 } 2886 2887 uint [] a, b; 2888 a = new uint[43]; 2889 b = new uint[179]; 2890 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i; 2891 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546; 2892 2893 a[$-1] |= 0x8000_0000; 2894 uint [] r = new uint[a.length]; 2895 uint [] q = new uint[b.length-a.length+1]; 2896 2897 divModInternal(q, r, b, a); 2898 q = q[0..$-1]; 2899 uint [] r1 = r.dup; 2900 uint [] q1 = q.dup; 2901 blockDivMod(q, b, a); 2902 r = b[0 .. a.length]; 2903 assert(r[] == r1[]); 2904 assert(q[] == q1[]); 2905 } 2906 2907 // biguintToOctal 2908 @safe unittest 2909 { 2910 enum bufSize = 5 * BigDigitBits / 3 + 1; 2911 auto buf = new char[bufSize]; 2912 size_t i; 2913 BigDigit[] data = [ 342391 ]; 2914 2915 // Basic functionality with single word 2916 i = biguintToOctal(buf, data); 2917 assert(i == bufSize - 7 && buf[i .. $] == "1234567"); 2918 2919 // Test carrying bits between words 2920 data = [ 0x77053977, 0x39770539, 0x00000005 ]; 2921 i = biguintToOctal(buf, data); 2922 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567"); 2923 2924 // Test carried bits in the last word 2925 data = [ 0x80000000 ]; 2926 i = biguintToOctal(buf, data); 2927 assert(buf[i .. $] == "20000000000"); 2928 2929 // Test boundary between 3rd and 4th word where the number of bits is 2930 // divisible by 3 and no bits should be carried. 2931 // 2932 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the 2933 // rollover happens, so that if any bugs happen in wrongly adding the carry 2934 // to the next word, non-zero bits will show up in the output. 2935 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ]; 2936 i = biguintToOctal(buf, data); 2937 assert(buf[i .. $] == "2060000000001400000000030000000000"); 2938 2939 // Boundary case: 0 2940 data = [ 0 ]; 2941 i = biguintToOctal(buf, data); 2942 assert(buf[i .. $] == "0"); 2943 }