1 /** Fundamental operations for arbitrary-precision arithmetic
2 *
3 * These functions are for internal use only.
4 */
5 /* Copyright Don Clugston 2008 - 2010.
6 * Distributed under the Boost Software License, Version 1.0.
7 * (See accompanying file LICENSE_1_0.txt or copy at
8 * http://www.boost.org/LICENSE_1_0.txt)
9 */
10 /* References:
11 "Modern Computer Arithmetic" (MCA) is the primary reference for all
12 algorithms used in this library.
13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14 Version 0.5.9, (Oct 2010).
15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16 Max-Planck Institute fuer Informatik, (Oct 1998).
17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18 INRIA 4664, (Dec 2002).
19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20 http://bodrato.it/papers (2006).
21 - A. Fog, "Optimizing subroutines in assembly language",
22 www.agner.org/optimize (2008).
23 - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24 www.agner.org/optimize (2008).
25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27
28 Idioms:
29 Many functions in this module use
30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31 in order to disable implicit conversion.
32
33 */
34 module std.internal.math.biguintcore;
35
36 version (D_InlineAsm_X86)
37 {
38 static import std.internal.math.biguintx86;
39 }
40 static import std.internal.math.biguintnoasm;
41
42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43 KARATSUBASQUARELIMIT;
44
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47
48 private import std.traits;
49 private import std.range.primitives;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53
54 private:
55
56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58 // used by the other architectures too.
59 // See comments below in case it has to be refactored.
60 version (X86)
61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62 {
63 // must be checked before, otherwise D_InlineAsm_X86 is true.
64 if (__ctfe)
65 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66 // Runtime.
67 else version (D_InlineAsm_X86)
68 return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69 // Runtime if no asm available.
70 else
71 return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72 }
73 // Any other architecture
74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75
76 version (X86)
77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78 {
79 if (__ctfe)
80 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81 else version (D_InlineAsm_X86)
82 return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83 else
84 return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85 }
86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87
88 version (X86)
89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90 {
91 if (__ctfe)
92 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93 else version (D_InlineAsm_X86)
94 return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95 else
96 return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97 }
98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99
100 version (X86)
101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102 {
103 if (__ctfe)
104 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105 else version (D_InlineAsm_X86)
106 std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107 else
108 std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109 }
110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111
112 version (X86)
113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114 {
115 if (__ctfe)
116 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117 else version (D_InlineAsm_X86)
118 return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119 else
120 return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121 }
122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123
124 version (X86)
125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126 {
127 if (__ctfe)
128 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129 else version (D_InlineAsm_X86)
130 return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131 else
132 return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133 }
134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135
136 version (X86)
137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138 {
139 if (__ctfe)
140 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141 else version (D_InlineAsm_X86)
142 std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143 else
144 std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145 }
146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147
148 version (X86)
149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150 {
151 if (__ctfe)
152 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153 else version (D_InlineAsm_X86)
154 return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155 else
156 return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157 }
158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159
160 version (X86)
161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162 {
163 if (__ctfe)
164 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165 else version (D_InlineAsm_X86)
166 std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167 else
168 std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169 }
170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171
172 version (X86)
173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174 {
175 if (__ctfe)
176 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177 else version (D_InlineAsm_X86)
178 std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179 else
180 std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181 }
182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183
184 version (X86)
185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186 {
187 if (__ctfe)
188 std.internal.math.biguintnoasm.multibyteSquare(result, x);
189 else version (D_InlineAsm_X86)
190 std.internal.math.biguintx86.multibyteSquare(result, x);
191 else
192 std.internal.math.biguintnoasm.multibyteSquare(result, x);
193 }
194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195
196 // Limits for when to switch between algorithms.
197 // Half the size of the data cache.
198 @nogc nothrow pure @safe size_t getCacheLimit()
199 {
200 import core.cpuid : dataCaches;
201 return dataCaches[0].size * 1024 / 2;
202 }
203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204
205
206 // These constants are used by shift operations
207 static if (BigDigit.sizeof == int.sizeof)
208 {
209 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210 alias BIGHALFDIGIT = ushort;
211 }
212 else static if (BigDigit.sizeof == long.sizeof)
213 {
214 alias BIGHALFDIGIT = uint;
215 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216 }
217 else static assert(0, "Unsupported BigDigit size");
218
219 import std.exception : assumeUnique;
220 import std.traits : isIntegral;
221 enum BigDigitBits = BigDigit.sizeof*8;
222 template maxBigDigits(T)
223 if (isIntegral!T)
224 {
225 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226 }
227
228 static immutable BigDigit[] ZERO = [0];
229 static immutable BigDigit[] ONE = [1];
230 static immutable BigDigit[] TWO = [2];
231 static immutable BigDigit[] TEN = [10];
232
233
234 public:
235
236 /// BigUint performs memory management and wraps the low-level calls.
237 struct BigUint
238 {
239 private:
240 pure invariant()
241 {
242 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243 "Invariant requires data to not empty or zero");
244 }
245
246 immutable(BigDigit) [] data = ZERO;
247
248 this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249 {
250 data = x;
251 }
252 package(std) // used from: std.bigint
253 this(T)(T x) pure nothrow @safe scope
254 if (isIntegral!T)
255 {
256 opAssign(x);
257 }
258
259 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
260 return assumeUnique(input);
261 };
262 public:
263 // Length in uints
264 @property size_t uintLength() pure nothrow const @safe @nogc scope
265 {
266 static if (BigDigit.sizeof == uint.sizeof)
267 {
268 return data.length;
269 }
270 else static if (BigDigit.sizeof == ulong.sizeof)
271 {
272 return data.length * 2 -
273 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
274 }
275 }
276 @property size_t ulongLength() pure nothrow const @safe @nogc scope
277 {
278 static if (BigDigit.sizeof == uint.sizeof)
279 {
280 return (data.length + 1) >> 1;
281 }
282 else static if (BigDigit.sizeof == ulong.sizeof)
283 {
284 return data.length;
285 }
286 }
287
288 // The value at (cast(ulong[]) data)[n]
289 ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
290 {
291 static if (BigDigit.sizeof == int.sizeof)
292 {
293 if (data.length == n*2 + 1) return data[n*2];
294 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
295 }
296 else static if (BigDigit.sizeof == long.sizeof)
297 {
298 return data[n];
299 }
300 }
301
302 uint peekUint(size_t n) pure nothrow const @safe @nogc scope
303 {
304 static if (BigDigit.sizeof == int.sizeof)
305 {
306 return data[n];
307 }
308 else
309 {
310 immutable x = data[n >> 1];
311 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
312 }
313 }
314
315 ///
316 void opAssign(Tulong)(Tulong u) pure nothrow @safe scope
317 if (is (Tulong == ulong))
318 {
319 if (u == 0) data = ZERO;
320 else if (u == 1) data = ONE;
321 else if (u == 2) data = TWO;
322 else if (u == 10) data = TEN;
323 else
324 {
325 static if (BigDigit.sizeof == int.sizeof)
326 {
327 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
328 uint uhi = cast(uint)(u >> 32);
329 if (uhi == 0)
330 {
331 data = [ulo];
332 }
333 else
334 {
335 data = [ulo, uhi];
336 }
337 }
338 else static if (BigDigit.sizeof == long.sizeof)
339 {
340 data = [u];
341 }
342 }
343 }
344 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
345 {
346 this.data = y.data;
347 }
348
349 ///
350 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
351 {
352 if (data.length != y.data.length)
353 return (data.length > y.data.length) ? 1 : -1;
354 size_t k = highestDifferentDigit(data, y.data);
355 if (data[k] == y.data[k])
356 return 0;
357 return data[k] > y.data[k] ? 1 : -1;
358 }
359
360 ///
361 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope
362 if (is (Tulong == ulong))
363 {
364 if (data.length > maxBigDigits!Tulong)
365 return 1;
366
367 foreach_reverse (i; 0 .. maxBigDigits!Tulong)
368 {
369 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
370 if (tmp == 0)
371 if (data.length >= i+1)
372 {
373 // Since ZERO is [0], so we cannot simply return 1 here, as
374 // data[i] would be 0 for i == 0 in that case.
375 return (data[i] > 0) ? 1 : 0;
376 }
377 else
378 continue;
379 else
380 if (i+1 > data.length)
381 return -1;
382 else if (tmp != data[i])
383 return data[i] > tmp ? 1 : -1;
384 }
385 return 0;
386 }
387
388 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
389 {
390 return y.data[] == data[];
391 }
392
393 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
394 {
395 if (data.length > 2)
396 return false;
397 uint ylo = cast(uint)(y & 0xFFFF_FFFF);
398 uint yhi = cast(uint)(y >> 32);
399 if (data.length == 2 && data[1]!=yhi)
400 return false;
401 if (data.length == 1 && yhi != 0)
402 return false;
403 return (data[0] == ylo);
404 }
405
406 bool isZero() pure const nothrow @safe @nogc scope
407 {
408 return data.length == 1 && data[0] == 0;
409 }
410
411 size_t numBytes() pure nothrow const @safe @nogc scope
412 {
413 return data.length * BigDigit.sizeof;
414 }
415
416 // the extra bytes are added to the start of the string
417 char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
418 {
419 immutable predictlength = 20+20*(data.length/2); // just over 19
420 char [] buff = new char[frontExtraBytes + predictlength];
421 ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
422 return buff[sofar-frontExtraBytes..$];
423 }
424
425 /** Convert to a hex string, printing a minimum number of digits 'minPadding',
426 * allocating an additional 'frontExtraBytes' at the start of the string.
427 * Padding is done with padChar, which may be '0' or ' '.
428 * 'separator' is a digit separation character. If non-zero, it is inserted
429 * between every 8 digits.
430 * Separator characters do not contribute to the minPadding.
431 */
432 char [] toHexString(int frontExtraBytes, char separator = 0,
433 int minPadding=0, char padChar = '0',
434 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
435 {
436 // Calculate number of extra padding bytes
437 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
438 ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
439
440 // Length not including separator bytes
441 size_t lenBytes = data.length * 2 * BigDigit.sizeof;
442
443 // Calculate number of separator bytes
444 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0;
445 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
446
447 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
448 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
449 if (extraPad > 0)
450 {
451 if (separator)
452 {
453 size_t start = frontExtraBytes; // first index to pad
454 if (extraPad &7)
455 {
456 // Do 1 to 7 extra zeros.
457 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
458 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
459 start += (extraPad & 7) + 1;
460 }
461 for (int i=0; i< (extraPad >> 3); ++i)
462 {
463 buff[start .. start + 8] = padChar;
464 buff[start + 8] = (padChar == ' ' ? ' ' : separator);
465 start += 9;
466 }
467 }
468 else
469 {
470 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
471 }
472 }
473 int z = frontExtraBytes;
474 if (lenBytes > minPadding)
475 {
476 // Strip leading zeros.
477 ptrdiff_t maxStrip = lenBytes - minPadding;
478 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
479 {
480 ++z;
481 --maxStrip;
482 }
483 }
484 if (padChar!='0')
485 {
486 // Convert leading zeros into padChars.
487 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
488 {
489 if (buff[k]=='0') buff[k]=padChar;
490 }
491 }
492 return buff[z-frontExtraBytes..$];
493 }
494
495 /**
496 * Convert to an octal string.
497 */
498 char[] toOctalString() pure nothrow @safe const scope
499 {
500 auto predictLength = 1 + data.length*BigDigitBits / 3;
501 char[] buff = new char[predictLength];
502 size_t firstNonZero = biguintToOctal(buff, data);
503 return buff[firstNonZero .. $];
504 }
505
506 // return false if invalid character found
507 bool fromHexString(Range)(Range s) scope
508 if (isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
509 {
510 import std.range : walkLength;
511
512 //Strip leading zeros
513 while (!s.empty && s.front == '0')
514 s.popFront;
515
516 if (s.empty)
517 {
518 data = ZERO;
519 return true;
520 }
521
522 immutable len = (s.save.walkLength + 15) / 4;
523 auto tmp = new BigDigit[len + 1];
524 uint part, sofar, partcount;
525
526 foreach_reverse (character; s)
527 {
528 if (character == '_')
529 continue;
530
531 uint x;
532 if (character >= '0' && character <= '9')
533 {
534 x = character - '0';
535 }
536 else if (character >= 'A' && character <= 'F')
537 {
538 x = character - 'A' + 10;
539 }
540 else if (character >= 'a' && character <= 'f')
541 {
542 x = character - 'a' + 10;
543 }
544 else
545 {
546 return false;
547 }
548
549 part >>= 4;
550 part |= (x << (32 - 4));
551 ++partcount;
552
553 if (partcount == 8)
554 {
555 tmp[sofar] = part;
556 ++sofar;
557 partcount = 0;
558 part = 0;
559 }
560 }
561 if (part)
562 {
563 for ( ; partcount != 8; ++partcount) part >>= 4;
564 tmp[sofar] = part;
565 ++sofar;
566 }
567 if (sofar == 0)
568 data = ZERO;
569 else
570 data = trustedAssumeUnique(tmp[0 .. sofar]);
571
572 return true;
573 }
574
575 // return true if OK; false if erroneous characters found
576 bool fromDecimalString(Range)(Range s) scope
577 if (isForwardRange!Range && isSomeChar!(ElementType!Range))
578 {
579 import std.range : walkLength;
580
581 while (!s.empty && s.front == '0')
582 {
583 s.popFront;
584 }
585
586 if (s.empty)
587 {
588 data = ZERO;
589 return true;
590 }
591
592 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
593 auto tmp = new BigDigit[predict_length];
594
595 tmp.length = biguintFromDecimal(tmp, s);
596
597 data = trustedAssumeUnique(tmp);
598 return true;
599 }
600
601 void fromMagnitude(Range)(Range magnitude) scope
602 if (isInputRange!Range
603 && (isForwardRange!Range || hasLength!Range)
604 && isUnsigned!(ElementType!Range))
605 {
606 while (!magnitude.empty && magnitude.front == 0)
607 magnitude.popFront;
608 static if (hasLength!Range)
609 immutable inputLen = magnitude.length;
610 else
611 immutable inputLen = magnitude.save.walkLength;
612 if (!inputLen)
613 {
614 this.data = ZERO;
615 return;
616 }
617 // `magnitude` has its most significant element first but BigUint.data
618 // stores the most significant last.
619 BigDigit[] newDigits;
620 alias E = ElementType!Range;
621 static if (E.sizeof == BigDigit.sizeof)
622 {
623 newDigits = new BigDigit[inputLen];
624 foreach_reverse (ref digit; newDigits)
625 {
626 digit = magnitude.front;
627 magnitude.popFront();
628 }
629 }
630 else static if (E.sizeof < BigDigit.sizeof)
631 {
632 enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
633 newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
634 immutable remainder = inputLen % elementsPerDigit;
635 // If there is a remainder specially assemble the most significant digit.
636 if (remainder)
637 {
638 BigDigit tmp = magnitude.front;
639 magnitude.popFront();
640 foreach (_; 1 .. remainder)
641 {
642 tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
643 magnitude.popFront();
644 }
645 newDigits[$-1] = tmp;
646 }
647 // Assemble full digits from most to least significant.
648 foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
649 {
650 BigDigit tmp;
651 static foreach (n; 0 .. elementsPerDigit)
652 {
653 tmp |= cast(BigDigit) magnitude.front <<
654 ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
655 magnitude.popFront();
656 }
657 digit = tmp;
658 }
659 }
660 else static if (E.sizeof > BigDigit.sizeof)
661 {
662 enum digitsPerElement = E.sizeof / BigDigit.sizeof;
663 newDigits = new BigDigit[inputLen * digitsPerElement];
664 size_t i = newDigits.length - 1;
665 foreach (element; magnitude)
666 {
667 static foreach (n; 0 .. digitsPerElement)
668 newDigits[i - n] =
669 cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
670 i -= digitsPerElement;
671 }
672 while (newDigits[$-1] == 0)
673 newDigits = newDigits[0 .. $-1];
674 }
675 else
676 static assert(0);
677 this.data = trustedAssumeUnique(newDigits);
678 return;
679 }
680
681 nothrow pure @safe unittest
682 {
683 immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
684 // Internal representation is most-significant-last but `fromMagnitude`
685 // argument is most-significant-first.
686 immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
687 BigUint b;
688 // Test with reference magnitude.
689 b.fromMagnitude(referenceMagnitude);
690 assert(b.data == referenceData);
691 // Test ubyte array.
692 import std.bitmanip : nativeToBigEndian;
693 ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
694 nativeToBigEndian(referenceMagnitude[1]) ~
695 nativeToBigEndian(referenceMagnitude[2]);
696 b.data = ZERO;
697 b.fromMagnitude(ubyteMagnitude);
698 assert(b.data == referenceData);
699 // Test ulong array.
700 static if (BigDigit.sizeof == uint.sizeof)
701 immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
702 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
703 ];
704 else static if (BigDigit.sizeof == ulong.sizeof)
705 alias ulongMagnitude = referenceMagnitude;
706 b.data = ZERO;
707 b.fromMagnitude(ulongMagnitude);
708 assert(b.data == referenceData);
709 }
710
711 ////////////////////////
712 //
713 // All of these member functions create a new BigUint.
714
715 // return x >> y
716 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
717 if (op == ">>" && is (Tulong == ulong))
718 {
719 assert(y > 0, "Can not right shift BigUint by 0");
720 uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
721 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
722 uint words = cast(uint)(y >> LG2BIGDIGITBITS);
723 if (bits == 0)
724 {
725 return BigUint(data[words..$]);
726 }
727 else
728 {
729 uint [] result = new BigDigit[data.length - words];
730 multibyteShr(result, data[words..$], bits);
731
732 if (result.length > 1 && result[$-1] == 0)
733 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
734 else
735 return BigUint(trustedAssumeUnique(result));
736 }
737 }
738
739 // return x << y
740 BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
741 if (op == "<<" && is (Tulong == ulong))
742 {
743 assert(y > 0, "Can not left shift BigUint by 0");
744 if (isZero()) return this;
745 uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
746 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
747 "Shift result exceeds temporary store");
748 uint words = cast(uint)(y >> LG2BIGDIGITBITS);
749 BigDigit [] result = new BigDigit[data.length + words+1];
750 result[0 .. words] = 0;
751 if (bits == 0)
752 {
753 result[words .. words+data.length] = data[];
754 return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
755 }
756 else
757 {
758 immutable c = multibyteShl(result[words .. words+data.length], data, bits);
759 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
760 result[$-1] = c;
761 return BigUint(trustedAssumeUnique(result));
762 }
763 }
764
765 // If wantSub is false, return x + y, leaving sign unchanged
766 // If wantSub is true, return abs(x - y), negating sign if x < y
767 static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, bool wantSub, ref bool sign) pure nothrow @safe
768 if (is(Tulong == ulong))
769 {
770 BigUint r;
771 if (wantSub)
772 { // perform a subtraction
773 if (x.data.length > 2)
774 {
775 // subInt returns GC allocated array, can be safely cast to immutable
776 r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
777 }
778 else
779 { // could change sign!
780 ulong xx = x.data[0];
781 if (x.data.length > 1)
782 xx += (cast(ulong) x.data[1]) << 32;
783 ulong d;
784 if (xx <= y)
785 {
786 d = y - xx;
787 sign = !sign;
788 }
789 else
790 {
791 d = xx - y;
792 }
793 if (d == 0)
794 {
795 r = 0UL;
796 sign = false;
797 return r;
798 }
799 if (d > uint.max)
800 {
801 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
802 }
803 else
804 {
805 r.data = [cast(uint)(d & 0xFFFF_FFFF)];
806 }
807 }
808 }
809 else
810 {
811 // addInt returns GC allocated array, can be safely cast to immutable
812 r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
813 }
814 return r;
815 }
816
817 // If wantSub is false, return x + y, leaving sign unchanged.
818 // If wantSub is true, return abs(x - y), negating sign if x<y
819 static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
820 pure nothrow @safe
821 {
822 BigUint r;
823 if (wantSub)
824 { // perform a subtraction
825 bool negative;
826 // sub returns GC allocated array, can be safely cast to immutable
827 r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
828 sign ^= negative;
829 if (r.isZero())
830 {
831 sign = false;
832 }
833 }
834 else
835 {
836 // add returns GC allocated array, can be safely cast to immutable
837 r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
838 }
839 return r;
840 }
841
842
843 // return x*y.
844 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
845 {
846 if (y == 0 || x == 0) return BigUint(ZERO);
847 static if (T.sizeof * 8 <= 32)
848 uint hi = 0;
849 else
850 uint hi = cast(uint) (y >>> 32);
851 uint lo = cast(uint) (y & 0xFFFF_FFFF);
852 uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
853 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
854 if (hi != 0)
855 {
856 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
857 x.data, hi, 0);
858 }
859 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
860 }
861
862 /* return x * y.
863 */
864 static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
865 {
866 if (y == 0 || x == 0)
867 return BigUint(ZERO);
868 auto len = x.data.length + y.data.length;
869 BigDigit [] result = new BigDigit[len];
870 if (y.data.length > x.data.length)
871 {
872 mulInternal(result, y.data, x.data);
873 }
874 else
875 {
876 if (x.data[]==y.data[]) squareInternal(result, x.data);
877 else mulInternal(result, x.data, y.data);
878 }
879 // the highest element could be zero,
880 // in which case we need to reduce the length
881 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
882 }
883
884 // return x / y
885 static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
886 if ( is(immutable T == immutable uint) )
887 {
888 uint y = y_;
889 if (y == 1)
890 return x;
891 uint [] result = new BigDigit[x.data.length];
892 if ((y&(-y))==y)
893 {
894 assert(y != 0, "BigUint division by zero");
895 // perfect power of 2
896 uint b = 0;
897 for (;y != 1; y>>=1)
898 {
899 ++b;
900 }
901 multibyteShr(result, x.data, b);
902 }
903 else
904 {
905 result[] = x.data[];
906 cast(void) multibyteDivAssign(result, y, 0);
907 }
908 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
909 }
910
911 static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
912 if ( is(immutable T == immutable ulong) )
913 {
914 if (y <= uint.max)
915 return divInt!uint(x, cast(uint) y);
916 if (x.data.length < 2)
917 return BigUint(ZERO);
918 uint hi = cast(uint)(y >>> 32);
919 uint lo = cast(uint)(y & 0xFFFF_FFFF);
920 immutable uint[2] z = [lo, hi];
921 BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
922 divModInternal(result, null, x.data, z[]);
923 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
924 }
925
926 // return x % y
927 static uint modInt(T)(scope BigUint x, T y_) pure
928 if ( is(immutable T == immutable uint) )
929 {
930 import core.memory : GC;
931 uint y = y_;
932 assert(y != 0, "% 0 not allowed");
933 if ((y&(-y)) == y)
934 { // perfect power of 2
935 return x.data[0] & (y-1);
936 }
937 else
938 {
939 // horribly inefficient - malloc, copy, & store are unnecessary.
940 uint [] wasteful = new BigDigit[x.data.length];
941 wasteful[] = x.data[];
942 immutable rem = multibyteDivAssign(wasteful, y, 0);
943 () @trusted { GC.free(wasteful.ptr); } ();
944 return rem;
945 }
946 }
947
948 // return x / y
949 static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
950 {
951 if (y.data.length > x.data.length)
952 return BigUint(ZERO);
953 if (y.data.length == 1)
954 return divInt(x, y.data[0]);
955 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
956 divModInternal(result, null, x.data, y.data);
957 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
958 }
959
960 // return x % y
961 static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
962 {
963 if (y.data.length > x.data.length) return x;
964 if (y.data.length == 1)
965 {
966 return BigUint([modInt(x, y.data[0])]);
967 }
968 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
969 BigDigit [] rem = new BigDigit[y.data.length];
970 divModInternal(result, rem, x.data, y.data);
971 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
972 }
973
974 // Return x / y in quotient, x % y in remainder
975 static void divMod(BigUint x, scope BigUint y,
976 out BigUint quotient, out BigUint remainder) pure nothrow @safe
977 {
978 /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
979 if (y.data.length > x.data.length)
980 {
981 quotient = 0uL;
982 remainder = x;
983 }
984 else if (y.data.length == 1)
985 {
986 quotient = divInt(x, y.data[0]);
987 remainder = BigUint([modInt(x, y.data[0])]);
988 }
989 else
990 {
991 BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
992 BigDigit[] rem = new BigDigit[y.data.length];
993 divModInternal(quot, rem, x.data, y.data);
994 quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
995 remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
996 }
997 }
998
999 // return x op y
1000 static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
1001 pure nothrow @safe
1002 if (op == "|" || op == "^" || op == "&")
1003 {
1004 auto d1 = includeSign(x.data, y.uintLength, xSign);
1005 auto d2 = includeSign(y.data, x.uintLength, ySign);
1006
1007 foreach (i; 0 .. d1.length)
1008 {
1009 mixin("d1[i] " ~ op ~ "= d2[i];");
1010 }
1011
1012 mixin("resultSign = xSign " ~ op ~ " ySign;");
1013
1014 if (resultSign)
1015 {
1016 twosComplement(d1, d1);
1017 }
1018
1019 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1020 }
1021
1022 /**
1023 * Return a BigUint which is x raised to the power of y.
1024 * Method: Powers of 2 are removed from x, then left-to-right binary
1025 * exponentiation is used.
1026 * Memory allocation is minimized: at most one temporary BigUint is used.
1027 */
1028 static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1029 {
1030 // Deal with the degenerate cases first.
1031 if (y == 0) return BigUint(ONE);
1032 if (y == 1) return x;
1033 if (x == 0 || x == 1) return x;
1034
1035 BigUint result;
1036
1037 // Simplify, step 1: Remove all powers of 2.
1038 uint firstnonzero = firstNonZeroDigit(x.data);
1039 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1040 // where BigDigitBits = BigDigit.sizeof * 8
1041
1042 // See if x[firstnonzero..$] can now fit into a single digit.
1043 bool singledigit = ((x.data.length - firstnonzero) == 1);
1044 // If true, then x0 is that digit
1045 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1046 BigDigit x0 = x.data[firstnonzero];
1047 assert(x0 != 0, "pow(0, y) not allowed");
1048 // Length of the non-zero portion
1049 size_t nonzerolength = x.data.length - firstnonzero;
1050 ulong y0;
1051 uint evenbits = 0; // number of even bits in the bottom of x
1052 while (!(x0 & 1))
1053 {
1054 x0 >>= 1;
1055 ++evenbits;
1056 }
1057
1058 if (x.data.length- firstnonzero == 2)
1059 {
1060 // Check for a single digit straddling a digit boundary
1061 const BigDigit x1 = x.data[firstnonzero+1];
1062 if ((x1 >> evenbits) == 0)
1063 {
1064 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1065 singledigit = true;
1066 }
1067 }
1068 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1069
1070 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1071
1072 // Simplify, step 2: For singledigits, see if we can trivially reduce y
1073
1074 BigDigit finalMultiplier = 1UL;
1075
1076 if (singledigit)
1077 {
1078 // x fits into a single digit. Raise it to the highest power we can
1079 // that still fits into a single digit, then reduce the exponent accordingly.
1080 // We're quite likely to have a residual multiply at the end.
1081 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1082 // and 5^^13 still fits into a uint.
1083 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1084 if (x0 == 1)
1085 { // Perfect power of 2
1086 result = 1UL;
1087 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1088 }
1089 immutable p = highestPowerBelowUintMax(x0);
1090 if (y <= p)
1091 { // Just do it with pow
1092 result = cast(ulong) intpow(x0, y);
1093 if (evenbits + firstnonzero == 0)
1094 return result;
1095 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1096 }
1097 y0 = y / p;
1098 finalMultiplier = intpow(x0, y - y0*p);
1099 x0 = intpow(x0, p);
1100 // Result is x0
1101 nonzerolength = 1;
1102 }
1103 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1104
1105 // Perform a crude check for overflow and allocate result buffer.
1106 // The length required is y * lg2(x) bits.
1107 // which will always fit into y*x.length digits. But this is
1108 // a gross overestimate if x is small (length 1 or 2) and the highest
1109 // digit is nearly empty.
1110 // A better estimate is:
1111 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1112 // and the first term is always between
1113 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1114 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1115 // For single digit payloads, we already have
1116 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1117 // and x0 is almost a full digit, so it's a tight estimate.
1118 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1119 // Note that the divisions must be rounded up.
1120
1121 // Estimated length in BigDigits
1122 immutable estimatelength = singledigit
1123 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1124 : x.data.length * y;
1125 // Imprecise check for overflow. Makes the extreme cases easier to debug
1126 // (less extreme overflow will result in an out of memory error).
1127 if (estimatelength > uint.max/(4*BigDigit.sizeof))
1128 assert(0, "Overflow in BigInt.pow");
1129
1130 // The result buffer includes space for all the trailing zeros
1131 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1132
1133 // Do all the powers of 2!
1134 size_t result_start = cast(size_t)( firstnonzero * y
1135 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1136
1137 resultBuffer[0 .. result_start] = 0;
1138 BigDigit [] t1 = resultBuffer[result_start..$];
1139 BigDigit [] r1;
1140
1141 if (singledigit)
1142 {
1143 r1 = t1[0 .. 1];
1144 r1[0] = x0;
1145 y = y0;
1146 }
1147 else
1148 {
1149 // It's not worth right shifting by evenbits unless we also shrink the length after each
1150 // multiply or squaring operation. That might still be worthwhile for large y.
1151 r1 = t1[0 .. x.data.length - firstnonzero];
1152 r1[0..$] = x.data[firstnonzero..$];
1153 }
1154
1155 if (y>1)
1156 { // Set r1 = r1 ^^ y.
1157 // The secondary buffer only needs space for the multiplication results
1158 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1159 BigDigit [] r2;
1160
1161 int shifts = 63; // num bits in a long
1162 while (!(y & 0x8000_0000_0000_0000L))
1163 {
1164 y <<= 1;
1165 --shifts;
1166 }
1167 y <<=1;
1168
1169 while (y != 0)
1170 {
1171 // For each bit of y: Set r1 = r1 * r1
1172 // If the bit is 1, set r1 = r1 * x
1173 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1174 // Optimization opportunity: if more than 2 bits in y are set,
1175 // it's usually possible to reduce the number of multiplies
1176 // by caching odd powers of x. eg for y = 54,
1177 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1178 r2 = t2[0 .. r1.length*2];
1179 squareInternal(r2, r1);
1180 if (y & 0x8000_0000_0000_0000L)
1181 {
1182 r1 = t1[0 .. r2.length + nonzerolength];
1183 if (singledigit)
1184 {
1185 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1186 }
1187 else
1188 {
1189 mulInternal(r1, r2, x.data[firstnonzero..$]);
1190 }
1191 }
1192 else
1193 {
1194 r1 = t1[0 .. r2.length];
1195 r1[] = r2[];
1196 }
1197 y <<=1;
1198 shifts--;
1199 }
1200 while (shifts>0)
1201 {
1202 r2 = t2[0 .. r1.length * 2];
1203 squareInternal(r2, r1);
1204 r1 = t1[0 .. r2.length];
1205 r1[] = r2[];
1206 --shifts;
1207 }
1208 }
1209
1210 if (finalMultiplier != 1)
1211 {
1212 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1213 if (carry)
1214 {
1215 r1 = t1[0 .. r1.length + 1];
1216 r1[$-1] = carry;
1217 }
1218 }
1219 if (evenshiftbits)
1220 {
1221 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1222 if (carry != 0)
1223 {
1224 r1 = t1[0 .. r1.length + 1];
1225 r1[$ - 1] = carry;
1226 }
1227 }
1228 while (r1[$ - 1]==0)
1229 {
1230 r1=r1[0 .. $ - 1];
1231 }
1232 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1233 }
1234
1235 // Implement toHash so that BigUint works properly as an AA key.
1236 size_t toHash() const @nogc nothrow pure @safe scope
1237 {
1238 return .hashOf(data);
1239 }
1240
1241 } // end BigUint
1242
1243 @safe pure nothrow unittest
1244 {
1245 // ulong comparison test
1246 BigUint a = [1];
1247 assert(a == 1);
1248 // https://issues.dlang.org/show_bug.cgi?id=9548
1249 assert(a < 0x8000_0000_0000_0000UL);
1250
1251 // https://issues.dlang.org/show_bug.cgi?id=12234
1252 BigUint z = [0];
1253 assert(z == 0UL);
1254 assert(!(z > 0UL));
1255 assert(!(z < 0UL));
1256 }
1257
1258 // https://issues.dlang.org/show_bug.cgi?id=16223
1259 @system pure nothrow unittest
1260 {
1261 BigUint a = [3];
1262 int b = 5;
1263 assert(BigUint.mulInt(a,b) == 15);
1264 }
1265
1266 // Remove leading zeros from x, to restore the BigUint invariant
1267 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1268 {
1269 size_t k = x.length;
1270 while (k>1 && x[k - 1]==0) --k;
1271 return x[0 .. k];
1272 }
1273
1274 pure @safe unittest
1275 {
1276 BigUint r = BigUint([5]);
1277 BigUint t = BigUint([7]);
1278 BigUint s = BigUint.mod(r, t);
1279 assert(s == 5);
1280 }
1281
1282
1283 @safe pure unittest
1284 {
1285 BigUint r;
1286 r = 5UL;
1287 assert(r.peekUlong(0) == 5UL);
1288 assert(r.peekUint(0) == 5U);
1289 r = 0x1234_5678_9ABC_DEF0UL;
1290 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1291 assert(r.peekUint(0) == 0x9ABC_DEF0U);
1292 }
1293
1294
1295 // Pow tests
1296 pure @safe unittest
1297 {
1298 BigUint r, s;
1299 r.fromHexString("80000000_00000001");
1300 s = BigUint.pow(r, 5);
1301 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1302 ~ "_00000002_80000000_00000001");
1303 assert(s == r);
1304 s = 10UL;
1305 s = BigUint.pow(s, 39);
1306 r.fromDecimalString("1000000000000000000000000000000000000000");
1307 assert(s == r);
1308 r.fromHexString("1_E1178E81_00000000");
1309 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1310
1311 r.fromDecimalString("000_000_00");
1312 assert(r == 0);
1313 r.fromDecimalString("0007");
1314 assert(r == 7);
1315 r.fromDecimalString("0");
1316 assert(r == 0);
1317 }
1318
1319 // Radix conversion tests
1320 @safe pure unittest
1321 {
1322 BigUint r;
1323 r.fromHexString("1_E1178E81_00000000");
1324 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1325 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1326 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1327 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1328 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000");
1329 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000");
1330 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000");
1331 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1332 r = 0UL;
1333 assert(r.toHexString(0, '_', 0) == "0");
1334 assert(r.toHexString(0, '_', 7) == "0000000");
1335 assert(r.toHexString(0, '_', 7, ' ') == " 0");
1336 assert(r.toHexString(0, '#', 9) == "0#00000000");
1337 assert(r.toHexString(0, 0, 9) == "000000000");
1338 }
1339
1340 //
1341 @safe pure unittest
1342 {
1343 BigUint r;
1344 r.fromHexString("1_E1178E81_00000000");
1345 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1346 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1347 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1348 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1349 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000");
1350 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1351 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000");
1352 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1353 r = 0UL;
1354 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1355 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1356 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0");
1357 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1358 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1359 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1360 }
1361
1362
1363 private:
1364 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1365 pure nothrow @safe
1366 {
1367 foreach (i; 0 .. x.length)
1368 {
1369 result[i] = ~x[i];
1370 }
1371 result[x.length..$] = BigDigit.max;
1372
1373 foreach (i; 0 .. result.length)
1374 {
1375 if (result[i] == BigDigit.max)
1376 {
1377 result[i] = 0;
1378 }
1379 else
1380 {
1381 result[i] += 1;
1382 break;
1383 }
1384 }
1385 }
1386
1387 // Encode BigInt as BigDigit array (sign and 2's complement)
1388 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1389 pure nothrow @safe
1390 {
1391 size_t length = (x.length > minSize) ? x.length : minSize;
1392 BigDigit [] result = new BigDigit[length];
1393 if (sign)
1394 {
1395 twosComplement(x, result);
1396 }
1397 else
1398 {
1399 result[0 .. x.length] = x;
1400 }
1401 return result;
1402 }
1403
1404 // works for any type
1405 T intpow(T)(T x, ulong n) pure nothrow @safe
1406 {
1407 T p;
1408
1409 switch (n)
1410 {
1411 case 0:
1412 p = 1;
1413 break;
1414
1415 case 1:
1416 p = x;
1417 break;
1418
1419 case 2:
1420 p = x * x;
1421 break;
1422
1423 default:
1424 p = 1;
1425 while (1)
1426 {
1427 if (n & 1)
1428 p *= x;
1429 n >>= 1;
1430 if (!n)
1431 break;
1432 x *= x;
1433 }
1434 break;
1435 }
1436 return p;
1437 }
1438
1439
1440 // returns the maximum power of x that will fit in a uint.
1441 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1442 {
1443 assert(x > 1, "x must be greater than 1");
1444 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1445 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1446 if (x<24) return maxpwr[x-2];
1447 if (x<41) return 6;
1448 if (x<85) return 5;
1449 if (x<256) return 4;
1450 if (x<1626) return 3;
1451 if (x<65_536) return 2;
1452 return 1;
1453 }
1454
1455 // returns the maximum power of x that will fit in a ulong.
1456 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1457 {
1458 assert(x > 1, "x must be greater than 1");
1459 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1460 17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1461 14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1462 12, 12, 12, 12, 12, 12, 12, 12, 12];
1463 if (x<41) return maxpwr[x-2];
1464 if (x<57) return 11;
1465 if (x<85) return 10;
1466 if (x<139) return 9;
1467 if (x<256) return 8;
1468 if (x<566) return 7;
1469 if (x<1626) return 6;
1470 if (x<7132) return 5;
1471 if (x<65_536) return 4;
1472 if (x<2_642_246) return 3;
1473 return 2;
1474 }
1475
1476 version (StdUnittest)
1477 {
1478
1479 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1480 {
1481 int pwr = 1;
1482 for (ulong q = x;x*q < cast(ulong) uint.max; )
1483 {
1484 q*=x; ++pwr;
1485 }
1486 return pwr;
1487 }
1488
1489 @safe pure unittest
1490 {
1491 assert(highestPowerBelowUintMax(10)==9);
1492 for (int k=82; k<88; ++k)
1493 {
1494 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1495 }
1496 }
1497
1498 }
1499
1500
1501 /* General unsigned subtraction routine for bigints.
1502 * Sets result = x - y. If the result is negative, negative will be true.
1503 * Returns:
1504 * unique memory
1505 */
1506 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1507 pure nothrow @safe
1508 {
1509 if (x.length == y.length)
1510 {
1511 // There's a possibility of cancellation, if x and y are almost equal.
1512 ptrdiff_t last = highestDifferentDigit(x, y);
1513 BigDigit [] result = new BigDigit[last+1];
1514 if (x[last] < y[last])
1515 { // we know result is negative
1516 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1517 *negative = true;
1518 }
1519 else
1520 { // positive or zero result
1521 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1522 *negative = false;
1523 }
1524 while (result.length > 1 && result[$-1] == 0)
1525 {
1526 result = result[0..$-1];
1527 }
1528 // if (result.length >1 && result[$-1]==0) return result[0..$-1];
1529 return result;
1530 }
1531 // Lengths are different
1532 const(BigDigit) [] large, small;
1533 if (x.length < y.length)
1534 {
1535 *negative = true;
1536 large = y; small = x;
1537 }
1538 else
1539 {
1540 *negative = false;
1541 large = x; small = y;
1542 }
1543 // result.length will be equal to larger length, or could decrease by 1.
1544
1545
1546 BigDigit [] result = new BigDigit[large.length];
1547 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1548 result[small.length..$] = large[small.length..$];
1549 if (carry)
1550 {
1551 multibyteIncrementAssign!('-')(result[small.length..$], carry);
1552 }
1553 while (result.length > 1 && result[$-1] == 0)
1554 {
1555 result = result[0..$-1];
1556 }
1557 return result;
1558 }
1559
1560
1561 /*
1562 * return a + b
1563 * Returns:
1564 * unique memory
1565 */
1566 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1567 {
1568 const(BigDigit) [] x, y;
1569 if (a.length < b.length)
1570 {
1571 x = b; y = a;
1572 }
1573 else
1574 {
1575 x = a; y = b;
1576 }
1577 // now we know x.length > y.length
1578 // create result. add 1 in case it overflows
1579 BigDigit [] result = new BigDigit[x.length + 1];
1580
1581 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1582 if (x.length != y.length)
1583 {
1584 result[y.length..$-1]= x[y.length..$];
1585 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1586 }
1587 if (carry)
1588 {
1589 result[$-1] = carry;
1590 return result;
1591 }
1592 else
1593 return result[0..$-1];
1594 }
1595
1596 /** return x + y
1597 */
1598 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1599 {
1600 uint hi = cast(uint)(y >>> 32);
1601 uint lo = cast(uint)(y& 0xFFFF_FFFF);
1602 auto len = x.length;
1603 if (x.length < 2 && hi != 0) ++len;
1604 BigDigit [] result = new BigDigit[len+1];
1605 result[0 .. x.length] = x[];
1606 if (x.length < 2 && hi != 0)
1607 {
1608 result[1]=hi;
1609 hi=0;
1610 }
1611 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1612 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1613 if (carry)
1614 {
1615 result[$-1] = carry;
1616 return result;
1617 }
1618 else
1619 return result[0..$-1];
1620 }
1621
1622 /** Return x - y.
1623 * x must be greater than y.
1624 */
1625 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1626 {
1627 uint hi = cast(uint)(y >>> 32);
1628 uint lo = cast(uint)(y & 0xFFFF_FFFF);
1629 BigDigit [] result = new BigDigit[x.length];
1630 result[] = x[];
1631 multibyteIncrementAssign!('-')(result[], lo);
1632 if (hi)
1633 multibyteIncrementAssign!('-')(result[1..$], hi);
1634 if (result[$-1] == 0)
1635 return result[0..$-1];
1636 else
1637 return result;
1638 }
1639
1640 /** General unsigned multiply routine for bigints.
1641 * Sets result = x * y.
1642 *
1643 * The length of y must not be larger than the length of x.
1644 * Different algorithms are used, depending on the lengths of x and y.
1645 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1646 * unbalanced case. (But I doubt it would be faster in practice).
1647 *
1648 */
1649 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1650 pure nothrow @safe
1651 {
1652 import core.memory : GC;
1653 assert( result.length == x.length + y.length,
1654 "result array must have enough space to store computed result");
1655 assert( y.length > 0, "y must not be empty");
1656 assert( x.length >= y.length, "x must be greater or equal than y");
1657 if (y.length <= KARATSUBALIMIT)
1658 {
1659 // Small multiplier, we'll just use the asm classic multiply.
1660 if (y.length == 1)
1661 { // Trivial case, no cache effects to worry about
1662 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1663 return;
1664 }
1665
1666 immutable CACHELIMIT = getCacheLimit;
1667 if (x.length + y.length < CACHELIMIT)
1668 return mulSimple(result, x, y);
1669
1670 // If x is so big that it won't fit into the cache, we divide it into chunks
1671 // Every chunk must be greater than y.length.
1672 // We make the first chunk shorter, if necessary, to ensure this.
1673
1674 auto chunksize = CACHELIMIT / y.length;
1675 immutable residual = x.length % chunksize;
1676 if (residual < y.length)
1677 {
1678 chunksize -= y.length;
1679 }
1680
1681 // Use schoolbook multiply.
1682 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1683 auto done = chunksize;
1684
1685 while (done < x.length)
1686 {
1687 // result[done .. done+ylength] already has a value.
1688 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done;
1689 BigDigit [KARATSUBALIMIT] partial;
1690 partial[0 .. y.length] = result[done .. done+y.length];
1691 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1692 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1693 done += chunksize;
1694 }
1695 return;
1696 }
1697
1698 immutable half = (x.length >> 1) + (x.length & 1);
1699 if (2*y.length*y.length <= x.length*x.length)
1700 {
1701 // UNBALANCED MULTIPLY
1702 // Use school multiply to cut into quasi-squares of Karatsuba-size
1703 // or larger. The ratio of the two sides of the 'square' must be
1704 // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1705 //
1706 // For maximum performance, we want the ratio to be as close to
1707 // 1:1 as possible. To achieve this, we can either pad x or y.
1708 // The best choice depends on the modulus x%y.
1709 auto numchunks = x.length / y.length;
1710 auto chunksize = y.length;
1711 auto extra = x.length % y.length;
1712 auto maxchunk = chunksize + extra;
1713 bool paddingY; // true = we're padding Y, false = we're padding X.
1714 bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1715 if (numchunks == 1 && isExtraSmall)
1716 {
1717 // We divide (x_first_half * y) and (x_last_half * y)
1718 // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1719 // (1.414 ~ 1.707)/2:1 is balanced.
1720 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1721 BigDigit [] partial = scratchbuff[$ - y.length .. $];
1722 scratchbuff = scratchbuff[0 .. $ - y.length];
1723 mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1724 partial[] = result[half .. half + y.length];
1725 mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1726 BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1727 if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1728 () @trusted { GC.free(scratchbuff.ptr); } ();
1729 }
1730 else
1731 {
1732 if (isExtraSmall)
1733 {
1734 // The leftover bit is small enough that it should be incorporated
1735 // in the existing chunks.
1736 // Make all the chunks a tiny bit bigger
1737 // (We're padding y with zeros)
1738 chunksize += extra / numchunks;
1739 extra = x.length - chunksize*numchunks;
1740 // there will probably be a few left over.
1741 // Every chunk will either have size chunksize, or chunksize+1.
1742 maxchunk = chunksize + 1;
1743 paddingY = true;
1744 assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1745 "Unexpected size");
1746 }
1747 else
1748 {
1749 // the extra bit is large enough that it's worth making a new chunk.
1750 // (This means we're padding x with zeros, when doing the first one).
1751 maxchunk = chunksize;
1752 ++numchunks;
1753 paddingY = false;
1754 assert(extra + chunksize *(numchunks-1) == x.length,
1755 "Unexpected size");
1756 }
1757 // We make the buffer a bit bigger so we have space for the partial sums.
1758 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1759 BigDigit [] partial = scratchbuff[$ - y.length .. $];
1760 scratchbuff = scratchbuff[0 .. $ - y.length];
1761 size_t done; // how much of X have we done so far?
1762 if (paddingY)
1763 {
1764 // If the first chunk is bigger, do it first. We're padding y.
1765 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1766 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1767 done = chunksize + (extra > 0 ? 1 : 0);
1768 if (extra) --extra;
1769 }
1770 else
1771 { // We're padding X. Begin with the extra bit.
1772 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1773 done = extra;
1774 extra = 0;
1775 }
1776 immutable basechunksize = chunksize;
1777 while (done < x.length)
1778 {
1779 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1780 if (extra) --extra;
1781 partial[] = result[done .. done+y.length];
1782 mulKaratsuba(result[done .. done + y.length + chunksize],
1783 x[done .. done+chunksize], y, scratchbuff);
1784 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1785 done += chunksize;
1786 }
1787 () @trusted { GC.free(scratchbuff.ptr); } ();
1788 }
1789 }
1790 else
1791 {
1792 // Balanced. Use Karatsuba directly.
1793 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1794 mulKaratsuba(result, x, y, scratchbuff);
1795 () @trusted { GC.free(scratchbuff.ptr); } ();
1796 }
1797 }
1798
1799 // https://issues.dlang.org/show_bug.cgi?id=20493
1800 @safe unittest
1801 {
1802 // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1803 // the number itself isn't important, only the amount of digits, so we do a simpler
1804 // multiplication of the same size, analogous to:
1805 // 11111111 * 11111111 = 0123456787654321
1806 // but instead of base 10, it's in base `BigDigit`
1807
1808 BigDigit[398] x = 1;
1809 BigDigit[236] y = 1;
1810 BigDigit[x.length + y.length] result;
1811 mulInternal(result[], x[], y[]);
1812
1813 // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1814 BigDigit[x.length + y.length] expected = y.length;
1815 foreach (BigDigit i; 0 .. y.length)
1816 {
1817 expected[i] = i+1;
1818 expected[$-1-i] = i;
1819 }
1820
1821 assert(result == expected);
1822 }
1823
1824 /** General unsigned squaring routine for BigInts.
1825 * Sets result = x*x.
1826 * NOTE: If the highest half-digit of x is zero, the highest digit of result will
1827 * also be zero.
1828 */
1829 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1830 {
1831 import core.memory : GC;
1832 // Squaring is potentially half a multiply, plus add the squares of
1833 // the diagonal elements.
1834 assert(result.length == 2*x.length,
1835 "result needs to have twice the capacity of x");
1836 if (x.length <= KARATSUBASQUARELIMIT)
1837 {
1838 if (x.length == 1)
1839 {
1840 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1841 return;
1842 }
1843 return squareSimple(result, x);
1844 }
1845 // The nice thing about squaring is that it always stays balanced
1846 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1847 squareKaratsuba(result, x, scratchbuff);
1848 () @trusted { GC.free(scratchbuff.ptr); } ();
1849 }
1850
1851
1852 import core.bitop : bsr;
1853
1854 /// if remainder is null, only calculate quotient.
1855 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1856 const BigDigit [] v) pure nothrow @safe
1857 {
1858 import core.memory : GC;
1859 assert(quotient.length == u.length - v.length + 1,
1860 "Invalid quotient length");
1861 assert(remainder == null || remainder.length == v.length,
1862 "Invalid remainder");
1863 assert(v.length > 1, "v must have more than 1 element");
1864 assert(u.length >= v.length, "u must be as longer or longer than v");
1865
1866 // Normalize by shifting v left just enough so that
1867 // its high-order bit is on, and shift u left the
1868 // same amount. The highest bit of u will never be set.
1869
1870 BigDigit [] vn = new BigDigit[v.length];
1871 BigDigit [] un = new BigDigit[u.length + 1];
1872 // How much to left shift v, so that its MSB is set.
1873 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1874 if (s != 0)
1875 {
1876 multibyteShl(vn, v, s);
1877 un[$-1] = multibyteShl(un[0..$-1], u, s);
1878 }
1879 else
1880 {
1881 vn[] = v[];
1882 un[0..$-1] = u[];
1883 un[$-1] = 0;
1884 }
1885 if (quotient.length<FASTDIVLIMIT)
1886 {
1887 schoolbookDivMod(quotient, un, vn);
1888 }
1889 else
1890 {
1891 blockDivMod(quotient, un, vn);
1892 }
1893
1894 // Unnormalize remainder, if required.
1895 if (remainder != null)
1896 {
1897 if (s == 0) remainder[] = un[0 .. vn.length];
1898 else multibyteShr(remainder, un[0 .. vn.length+1], s);
1899 }
1900 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1901 }
1902
1903 pure @safe unittest
1904 {
1905 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1906 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1907 uint [] q = new uint[u.length - v.length + 1];
1908 uint [] r = new uint[2];
1909 divModInternal(q, r, u, v);
1910 assert(q[]==[0xFFFF_FFFFu, 0]);
1911 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1912 u = [0, 0xFFFF_FFFE, 0x8000_0001];
1913 v = [0xFFFF_FFFF, 0x8000_0000];
1914 divModInternal(q, r, u, v);
1915 }
1916
1917
1918 // Converts a big uint to a hexadecimal string.
1919 //
1920 // Optionally, a separator character (eg, an underscore) may be added between
1921 // every 8 digits.
1922 // buff.length must be data.length*8 if separator is zero,
1923 // or data.length*9 if separator is non-zero. It will be completely filled.
1924 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1925 LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1926 {
1927 int x=0;
1928 for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1929 {
1930 toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1931 x+=8;
1932 if (separator)
1933 {
1934 if (i>0) buff[x] = separator;
1935 ++x;
1936 }
1937 }
1938 return buff;
1939 }
1940
1941 /**
1942 * Convert a big uint into an octal string.
1943 *
1944 * Params:
1945 * buff = The destination buffer for the octal string. Must be large enough to
1946 * store the result, including leading zeroes, which is
1947 * ceil(data.length * BigDigitBits / 3) characters.
1948 * The buffer is filled from back to front, starting from `buff[$-1]`.
1949 * data = The biguint to be converted.
1950 *
1951 * Returns: The index of the leading non-zero digit in `buff`. Will be
1952 * `buff.length - 1` if the entire big uint is zero.
1953 */
1954 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1955 pure nothrow @safe @nogc
1956 {
1957 ubyte carry = 0;
1958 int shift = 0;
1959 size_t penPos = buff.length - 1;
1960 size_t lastNonZero = buff.length - 1;
1961
1962 pragma(inline) void output(uint digit) @nogc nothrow
1963 {
1964 if (digit != 0)
1965 lastNonZero = penPos;
1966 buff[penPos--] = cast(char)('0' + digit);
1967 }
1968
1969 foreach (bigdigit; data)
1970 {
1971 if (shift < 0)
1972 {
1973 // Some bits were carried over from previous word.
1974 assert(shift > -3, "shift must be greater than -3");
1975 output(((bigdigit << -shift) | carry) & 0b111);
1976 shift += 3;
1977 assert(shift > 0, "shift must be 1 or greater");
1978 }
1979
1980 while (shift <= BigDigitBits - 3)
1981 {
1982 output((bigdigit >>> shift) & 0b111);
1983 shift += 3;
1984 }
1985
1986 if (shift < BigDigitBits)
1987 {
1988 // Some bits need to be carried forward.
1989 carry = (bigdigit >>> shift) & 0b11;
1990 }
1991 shift -= BigDigitBits;
1992 assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
1993 }
1994
1995 if (shift < 0)
1996 {
1997 // Last word had bits that haven't been output yet.
1998 assert(shift > -3, "Shift must be greater than -3");
1999 output(carry);
2000 }
2001
2002 return lastNonZero;
2003 }
2004
2005 /** Convert a big uint into a decimal string.
2006 *
2007 * Params:
2008 * data The biguint to be converted. Will be destroyed.
2009 * buff The destination buffer for the decimal string. Must be
2010 * large enough to store the result, including leading zeros.
2011 * Will be filled backwards, starting from buff[$-1].
2012 *
2013 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2014 * Returns:
2015 * the lowest index of buff which was used.
2016 */
2017 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2018 {
2019 ptrdiff_t sofar = buff.length;
2020 // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2021 // the price of 3 divisions and a shr; this version only gives 27 digits
2022 // for 3 divisions.
2023 while (data.length>1)
2024 {
2025 uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2026 itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2027 sofar -= 9;
2028 if (data[$-1] == 0 && data.length > 1)
2029 {
2030 data.length = data.length - 1;
2031 }
2032 }
2033 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2034 sofar -= 10;
2035 // and strip off the leading zeros
2036 while (sofar != buff.length-1 && buff[sofar] == '0')
2037 sofar++;
2038 return sofar;
2039 }
2040
2041 /** Convert a decimal string into a big uint.
2042 *
2043 * Params:
2044 * data The biguint to be receive the result. Must be large enough to
2045 * store the result.
2046 * s The decimal string. May contain _ or 0 .. 9
2047 *
2048 * The required length for the destination buffer is slightly less than
2049 * 1 + s.length/log2(10) = 1 + s.length/3.3219.
2050 *
2051 * Returns:
2052 * the highest index of data which was used.
2053 */
2054 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2055 if (
2056 isInputRange!Range &&
2057 isSomeChar!(ElementType!Range) &&
2058 !isInfinite!Range)
2059 in
2060 {
2061 static if (hasLength!Range)
2062 assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2063 "data has a invalid length");
2064 }
2065 do
2066 {
2067 import std.conv : ConvException;
2068
2069 // Convert to base 1e19 = 10_000_000_000_000_000_000.
2070 // (this is the largest power of 10 that will fit into a long).
2071 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2072 // 485 bits will only just fit into 146 decimal digits.
2073 // As we convert the string, we record the number of digits we've seen in base 19:
2074 // hi is the number of digits/19, lo is the extra digits (0 to 18).
2075 // TODO: This is inefficient for very large strings (it is O(n^^2)).
2076 // We should take advantage of fast multiplication once the numbers exceed
2077 // Karatsuba size.
2078 uint lo = 0; // number of powers of digits, 0 .. 18
2079 uint x = 0;
2080 ulong y = 0;
2081 uint hi = 0; // number of base 1e19 digits
2082 data[0] = 0; // initially number is 0.
2083 if (data.length > 1)
2084 data[1] = 0;
2085
2086 foreach (character; s)
2087 {
2088 if (character == '_')
2089 continue;
2090
2091 if (character < '0' || character > '9')
2092 throw new ConvException("invalid digit");
2093 x *= 10;
2094 x += character - '0';
2095 ++lo;
2096 if (lo == 9)
2097 {
2098 y = x;
2099 x = 0;
2100 }
2101 if (lo == 18)
2102 {
2103 y *= 10_0000_0000;
2104 y += x;
2105 x = 0;
2106 }
2107 if (lo == 19)
2108 {
2109 y *= 10;
2110 y += x;
2111 x = 0;
2112 // Multiply existing number by 10^19, then add y1.
2113 if (hi>0)
2114 {
2115 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2116 ++hi;
2117 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2118 ++hi;
2119 }
2120 else
2121 hi = 2;
2122 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2123 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2124 if (c != 0)
2125 {
2126 data[hi]=c;
2127 ++hi;
2128 }
2129 y = 0;
2130 lo = 0;
2131 }
2132 }
2133 // Now set y = all remaining digits.
2134 if (lo >= 18)
2135 {
2136 }
2137 else if (lo >= 9)
2138 {
2139 for (int k=9; k<lo; ++k) y*=10;
2140 y+=x;
2141 }
2142 else
2143 {
2144 for (int k=0; k<lo; ++k) y*=10;
2145 y+=x;
2146 }
2147 if (lo != 0)
2148 {
2149 if (hi == 0)
2150 {
2151 data[0] = cast(uint) y;
2152 if (data.length == 1)
2153 {
2154 hi = 1;
2155 }
2156 else
2157 {
2158 data[1] = cast(uint)(y >>> 32);
2159 hi=2;
2160 }
2161 }
2162 else
2163 {
2164 while (lo>0)
2165 {
2166 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2167 if (c != 0)
2168 {
2169 data[hi]=c;
2170 ++hi;
2171 }
2172 --lo;
2173 }
2174 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2175 if (y > 0xFFFF_FFFFL)
2176 {
2177 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2178 }
2179 if (c != 0)
2180 {
2181 data[hi]=c;
2182 ++hi;
2183 }
2184 }
2185 }
2186 while (hi>1 && data[hi-1]==0)
2187 --hi;
2188 return hi;
2189 }
2190
2191
2192 // ------------------------
2193 // These in-place functions are only for internal use; they are incompatible
2194 // with COW.
2195
2196 // Classic 'schoolbook' multiplication.
2197 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2198 const(BigDigit)[] right) pure nothrow @safe
2199 in
2200 {
2201 assert(result.length == left.length + right.length,
2202 "Result must be able to store left + right");
2203 assert(right.length>1, "right must not be empty");
2204 }
2205 do
2206 {
2207 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2208 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2209 }
2210
2211 // Classic 'schoolbook' squaring
2212 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2213 in
2214 {
2215 assert(result.length == 2*x.length, "result must be twice as long as x");
2216 assert(x.length>1, "x must not be empty");
2217 }
2218 do
2219 {
2220 multibyteSquare(result, x);
2221 }
2222
2223
2224 // add two uints of possibly different lengths. Result must be as long
2225 // as the larger length.
2226 // Returns carry (0 or 1).
2227 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2228 pure nothrow @safe
2229 in
2230 {
2231 assert(result.length == left.length,
2232 "result and left must be of the same length");
2233 assert(left.length >= right.length,
2234 "left must be longer or of equal length to right");
2235 assert(right.length > 0, "right must not be empty");
2236 }
2237 do
2238 {
2239 uint carry = multibyteAdd(result[0 .. right.length],
2240 left[0 .. right.length], right, 0);
2241 if (right.length < left.length)
2242 {
2243 result[right.length .. left.length] = left[right.length .. $];
2244 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2245 }
2246 return carry;
2247 }
2248
2249 /* result = result - right
2250 * Returns carry = 1 if result was less than right.
2251 */
2252 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2253 pure nothrow @safe
2254 {
2255 assert(result.length >= right.length,
2256 "result must be longer or of equal length to right");
2257 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2258 if (c && result.length > right.length)
2259 c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2260 return c;
2261 }
2262
2263 /* result = result + right
2264 */
2265 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2266 pure nothrow @safe
2267 {
2268 assert(result.length >= right.length,
2269 "result must be longer or of equal length to right");
2270 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2271 if (c && result.length > right.length)
2272 c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2273 return c;
2274 }
2275
2276 /* performs result += wantSub? - right : right;
2277 */
2278 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2279 bool wantSub) pure nothrow @safe
2280 {
2281 if (wantSub)
2282 return subAssignSimple(result, right);
2283 else
2284 return addAssignSimple(result, right);
2285 }
2286
2287
2288 // return true if x<y, considering leading zeros
2289 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2290 {
2291 assert(x.length >= y.length,
2292 "x must be longer or of equal length to y");
2293 auto k = x.length-1;
2294 while (x[k]==0 && k >= y.length)
2295 --k;
2296 if (k >= y.length)
2297 return false;
2298 while (k>0 && x[k]==y[k])
2299 --k;
2300 return x[k] < y[k];
2301 }
2302
2303 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2304 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2305 pure nothrow @safe
2306 {
2307 assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2308 "result must capable to store the maximum of x and y");
2309
2310 size_t minlen;
2311 bool negative;
2312 if (x.length >= y.length)
2313 {
2314 minlen = y.length;
2315 negative = less(x, y);
2316 }
2317 else
2318 {
2319 minlen = x.length;
2320 negative = !less(y, x);
2321 }
2322 const (BigDigit)[] large, small;
2323 if (negative)
2324 {
2325 large = y; small = x;
2326 }
2327 else
2328 {
2329 large = x; small = y;
2330 }
2331
2332 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2333 if (x.length != y.length)
2334 {
2335 result[minlen .. large.length]= large[minlen..$];
2336 result[large.length..$] = 0;
2337 if (carry)
2338 multibyteIncrementAssign!('-')(result[minlen..$], carry);
2339 }
2340 return negative;
2341 }
2342
2343 /* Determine how much space is required for the temporaries
2344 * when performing a Karatsuba multiplication.
2345 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2346 * https://issues.dlang.org/show_bug.cgi?id=20493
2347 */
2348 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2349 {
2350 return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2351 }
2352
2353 /* Sets result = x*y, using Karatsuba multiplication.
2354 * x must be longer or equal to y.
2355 * Valid only for balanced multiplies, where x is not shorter than y.
2356 * It is superior to schoolbook multiplication if and only if
2357 * sqrt(2)*y.length > x.length > y.length.
2358 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2359 * The maximum allowable length of x and y is uint.max; but better algorithms
2360 * should be used far before that length is reached.
2361 * Params:
2362 * scratchbuff An array long enough to store all the temporaries. Will be destroyed.
2363 */
2364 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2365 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2366 {
2367 assert(x.length >= y.length, "x must be greater or equal to y");
2368 assert(result.length < uint.max, "Operands too large");
2369 assert(result.length == x.length + y.length,
2370 "result must be as large as x + y");
2371 if (x.length <= KARATSUBALIMIT)
2372 {
2373 return mulSimple(result, x, y);
2374 }
2375 // Must be almost square (otherwise, a schoolbook iteration is better)
2376 assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2377 "Bigint Internal Error: Asymmetric Karatsuba");
2378
2379 // The subtractive version of Karatsuba multiply uses the following result:
2380 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2381 // where mid = (x0-x1)*(y0-y1)
2382 // requiring 3 multiplies of length N, instead of 4.
2383 // The advantage of the subtractive over the additive version is that
2384 // the mid multiply cannot exceed length N. But there are subtleties:
2385 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2386 // retain all of the leading zeros in the subtractions
2387
2388 // half length, round up.
2389 auto half = (x.length >> 1) + (x.length & 1);
2390
2391 const(BigDigit) [] x0 = x[0 .. half];
2392 const(BigDigit) [] x1 = x[half .. $];
2393 const(BigDigit) [] y0 = y[0 .. half];
2394 const(BigDigit) [] y1 = y[half .. $];
2395 BigDigit [] mid = scratchbuff[0 .. half*2];
2396 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2397 BigDigit [] resultLow = result[0 .. 2*half];
2398 BigDigit [] resultHigh = result[2*half .. $];
2399 // initially use result to store temporaries
2400 BigDigit [] xdiff= result[0 .. half];
2401 BigDigit [] ydiff = result[half .. half*2];
2402
2403 // First, we calculate mid, and sign of mid
2404 immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2405 ^ inplaceSub(ydiff, y0, y1);
2406 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2407
2408 // Low half of result gets x0 * y0. High half gets x1 * y1
2409
2410 mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2411
2412 if (2L * y1.length * y1.length < x1.length * x1.length)
2413 {
2414 // an asymmetric situation has been created.
2415 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2416 // Applying one schoolbook multiply gives us two pieces each 1.2:1
2417 if (y1.length <= KARATSUBALIMIT)
2418 mulSimple(resultHigh, x1, y1);
2419 else
2420 {
2421 // divide x1 in two, then use schoolbook multiply on the two pieces.
2422 auto quarter = (x1.length >> 1) + (x1.length & 1);
2423 immutable ysmaller = (quarter >= y1.length);
2424 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2425 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2426 // Save the part which will be overwritten.
2427 immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2428 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2429 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2430 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2431
2432 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2433 }
2434 }
2435 else
2436 mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2437
2438 /* We now have result = x0y0 + (N*N)*x1y1
2439 Before adding or subtracting mid, we must calculate
2440 result += N * (x0y0 + x1y1)
2441 We can do this with three half-length additions. With a = x0y0, b = x1y1:
2442 aHI aLO
2443 + aHI aLO
2444 + bHI bLO
2445 + bHI bLO
2446 = R3 R2 R1 R0
2447 R1 = aHI + bLO + aLO
2448 R2 = aHI + bLO + aHI + carry_from_R1
2449 R3 = bHi + carry_from_R2
2450
2451 It might actually be quicker to do it in two full-length additions:
2452 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2453 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2454 */
2455 BigDigit[] R1 = result[half .. half*2];
2456 BigDigit[] R2 = result[half*2 .. half*3];
2457 BigDigit[] R3 = result[half*3..$];
2458 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2459 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2460 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2461 if (c1+c2)
2462 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2463 if (c1+c3)
2464 multibyteIncrementAssign!('+')(R3, c1+c3);
2465
2466 // And finally we subtract mid
2467 addOrSubAssignSimple(result[half..$], mid, !midNegative);
2468 }
2469
2470 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2471 BigDigit [] scratchbuff) pure nothrow @safe
2472 {
2473 // See mulKaratsuba for implementation comments.
2474 // Squaring is simpler, since it never gets asymmetric.
2475 assert(result.length < uint.max, "Operands too large");
2476 assert(result.length == 2*x.length,
2477 "result must be twice the length of x");
2478 if (x.length <= KARATSUBASQUARELIMIT)
2479 {
2480 return squareSimple(result, x);
2481 }
2482 // half length, round up.
2483 auto half = (x.length >> 1) + (x.length & 1);
2484
2485 const(BigDigit)[] x0 = x[0 .. half];
2486 const(BigDigit)[] x1 = x[half .. $];
2487 BigDigit [] mid = scratchbuff[0 .. half*2];
2488 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2489 // initially use result to store temporaries
2490 BigDigit [] xdiff= result[0 .. half];
2491 const BigDigit [] ydiff = result[half .. half*2];
2492
2493 // First, we calculate mid. We don't need its sign
2494 inplaceSub(xdiff, x0, x1);
2495 squareKaratsuba(mid, xdiff, newscratchbuff);
2496
2497 // Set result = x0x0 + (N*N)*x1x1
2498 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2499 squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2500
2501 /* result += N * (x0x0 + x1x1)
2502 Do this with three half-length additions. With a = x0x0, b = x1x1:
2503 R1 = aHI + bLO + aLO
2504 R2 = aHI + bLO + aHI + carry_from_R1
2505 R3 = bHi + carry_from_R2
2506 */
2507 BigDigit[] R1 = result[half .. half*2];
2508 BigDigit[] R2 = result[half*2 .. half*3];
2509 BigDigit[] R3 = result[half*3..$];
2510 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2511 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2512 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2513 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2514 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2515
2516 // And finally we subtract mid, which is always positive
2517 subAssignSimple(result[half..$], mid);
2518 }
2519
2520 /* Knuth's Algorithm D, as presented in
2521 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2522 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2523 * Given u and v, calculates quotient = u / v, u = u % v.
2524 * v must be normalized (ie, the MSB of v must be 1).
2525 * The most significant words of quotient and u may be zero.
2526 * u[0 .. v.length] holds the remainder.
2527 */
2528 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2529 pure nothrow @safe
2530 {
2531 assert(quotient.length == u.length - v.length,
2532 "quotient has wrong length");
2533 assert(v.length > 1, "v must not be empty");
2534 assert(u.length >= v.length, "u must be larger or equal to v");
2535 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2536 assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2537 // BUG: This code only works if BigDigit is uint.
2538 uint vhi = v[$-1];
2539 uint vlo = v[$-2];
2540
2541 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2542 {
2543 // Compute estimate of quotient[j],
2544 // qhat = (three most significant words of u)/(two most sig words of v).
2545 uint qhat;
2546 if (u[j + v.length] == vhi)
2547 {
2548 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2549 qhat = uint.max;
2550 }
2551 else
2552 {
2553 uint ulo = u[j + v.length - 2];
2554 version (D_InlineAsm_X86)
2555 {
2556 // Note: On DMD, this is only ~10% faster than the non-asm code.
2557 uint *p = &u[j + v.length - 1];
2558 asm pure nothrow @trusted
2559 {
2560 mov EAX, p;
2561 mov EDX, [EAX+4];
2562 mov EAX, [EAX];
2563 div dword ptr [vhi];
2564 mov qhat, EAX;
2565 mov ECX, EDX;
2566 div3by2correction:
2567 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2568 sub EAX, ulo;
2569 sbb EDX, ECX;
2570 jbe div3by2done;
2571 mov EAX, qhat;
2572 dec EAX;
2573 mov qhat, EAX;
2574 add ECX, dword ptr [vhi];
2575 jnc div3by2correction;
2576 div3by2done: ;
2577 }
2578 }
2579 else
2580 { // version (InlineAsm)
2581 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2582 immutable bigqhat = uu / vhi;
2583 ulong rhat = uu - bigqhat * vhi;
2584 qhat = cast(uint) bigqhat;
2585 again:
2586 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2587 {
2588 --qhat;
2589 rhat += vhi;
2590 if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2591 goto again;
2592 }
2593 } // version (InlineAsm)
2594 }
2595 // Multiply and subtract.
2596 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2597
2598 if (u[j+v.length] < carry)
2599 {
2600 // If we subtracted too much, add back
2601 --qhat;
2602 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2603 }
2604 quotient[j] = qhat;
2605 u[j + v.length] = u[j + v.length] - carry;
2606 }
2607 }
2608
2609 // TODO: Replace with a library call
2610 void itoaZeroPadded(char[] output, uint value)
2611 pure nothrow @safe @nogc
2612 {
2613 for (auto i = output.length; i--;)
2614 {
2615 if (value < 10)
2616 {
2617 output[i] = cast(char)(value + '0');
2618 value = 0;
2619 }
2620 else
2621 {
2622 output[i] = cast(char)(value % 10 + '0');
2623 value /= 10;
2624 }
2625 }
2626 }
2627
2628 void toHexZeroPadded(char[] output, uint value,
2629 LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2630 {
2631 ptrdiff_t x = output.length - 1;
2632 static immutable string upperHexDigits = "0123456789ABCDEF";
2633 static immutable string lowerHexDigits = "0123456789abcdef";
2634 for ( ; x >= 0; --x)
2635 {
2636 if (letterCase == LetterCase.upper)
2637 {
2638 output[x] = upperHexDigits[value & 0xF];
2639 }
2640 else
2641 {
2642 output[x] = lowerHexDigits[value & 0xF];
2643 }
2644 value >>= 4;
2645 }
2646 }
2647
2648 // Returns the highest value of i for which left[i]!=right[i],
2649 // or 0 if left[] == right[]
2650 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2651 pure nothrow @nogc @safe
2652 {
2653 assert(left.length == right.length,
2654 "left have a length equal to that of right");
2655 for (ptrdiff_t i = left.length - 1; i>0; --i)
2656 {
2657 if (left[i] != right[i])
2658 return i;
2659 }
2660 return 0;
2661 }
2662
2663 // Returns the lowest value of i for which x[i]!=0.
2664 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2665 {
2666 int k = 0;
2667 while (x[k]==0)
2668 {
2669 ++k;
2670 assert(k < x.length, "k must be less than x.length");
2671 }
2672 return k;
2673 }
2674
2675 /*
2676 Calculate quotient and remainder of u / v using fast recursive division.
2677 v must be normalised, and must be at least half as long as u.
2678 Given u and v, v normalised, calculates quotient = u/v, u = u%v.
2679 scratch is temporary storage space, length must be >= quotient + 1.
2680
2681 Returns:
2682 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2683
2684 Implements algorithm 1.8 from MCA.
2685 This algorithm has an annoying special case. After the first recursion, the
2686 highest bit of the quotient may be set. This means that in the second
2687 recursive call, the 'in' contract would be violated. (This happens only
2688 when the top quarter of u is equal to the top half of v. A base 10
2689 equivalent example of this situation is 5517/56; the first step gives
2690 55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2691 u and the quotient. 'mayOverflow' indicates that that the special case
2692 has occurred.
2693 (In MCA, a different strategy is used: the in contract is weakened, and
2694 schoolbookDivMod is more general: it allows the high bit of u to be set).
2695 See also:
2696 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2697 Max-Planck Institute fuer Informatik, (Oct 1998).
2698 */
2699 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2700 BigDigit[] scratch, bool mayOverflow = false)
2701 pure nothrow @safe
2702 in
2703 {
2704 // v must be normalized
2705 assert(v.length > 1, "v must not be empty");
2706 assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2707 assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2708 assert(quotient.length == u.length - v.length,
2709 "quotient must be of equal length of u - v");
2710 if (mayOverflow)
2711 {
2712 assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2713 assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2714 }
2715
2716 // Must be symmetric. Use block schoolbook division if not.
2717 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2718 "Invalid length of u");
2719 assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2720 "Invalid length of u");
2721 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2722 "Invalid quotient length");
2723 }
2724 do
2725 {
2726 if (quotient.length < FASTDIVLIMIT)
2727 {
2728 return schoolbookDivMod(quotient, u, v);
2729 }
2730
2731 // Split quotient into two halves, but keep padding in the top half
2732 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1;
2733
2734 // RECURSION 1: Calculate the high half of the quotient
2735
2736 // Note that if u and quotient were padded, they remain padded during
2737 // this call, so in contract is satisfied.
2738 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2739 scratch, mayOverflow);
2740
2741 // quotient[k..$] is our guess at the high quotient.
2742 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2743 // first remainder. u[0 .. 2*k] is the low part.
2744
2745 // Calculate the full first remainder to be
2746 // remainder - highQuotient * lowDivisor
2747 // reducing highQuotient until the remainder is positive.
2748 // The low part of the remainder, u[0 .. k], cannot be altered by this.
2749
2750 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2751 scratch[0 .. quotient.length], mayOverflow);
2752
2753 // RECURSION 2: Calculate the low half of the quotient
2754 // The full first remainder is now in u[0 .. k + v.length].
2755
2756 if (u[k + v.length - 1] & 0x8000_0000)
2757 {
2758 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2759 // This means we need an extra quotient word for the next recursion.
2760 // We need to restore the invariant for the recursive calls.
2761 // We do this by padding both u and quotient. Extending u is trivial,
2762 // because the higher words will not be used again. But for the
2763 // quotient, we're clobbering the low word of the high quotient,
2764 // so we need save it, and add it back in after the recursive call.
2765
2766 auto clobberedQuotient = quotient[k];
2767 u[k+v.length] = 0;
2768
2769 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2770 v[k .. $], scratch, true);
2771 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2772 scratch[0 .. 2 * k+1], true);
2773
2774 // Now add the quotient word that got clobbered earlier.
2775 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2776 }
2777 else
2778 {
2779 // The special case has NOT happened.
2780 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2781 scratch, false);
2782
2783 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2784
2785 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2786 scratch[0 .. 2 * k]);
2787 }
2788 }
2789
2790 // rem -= quot * v[0 .. k].
2791 // If would make rem negative, decrease quot until rem is >= 0.
2792 // Needs (quot.length * k) scratch space to store the result of the multiply.
2793 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2794 ptrdiff_t k,
2795 BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2796 {
2797 assert(rem.length == v.length, "rem must be as long as v");
2798 mulInternal(scratch, quot, v[0 .. k]);
2799 uint carry = 0;
2800 if (mayOverflow)
2801 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2802 else
2803 carry = subAssignSimple(rem, scratch);
2804 while (carry)
2805 {
2806 multibyteIncrementAssign!('-')(quot, 1); // quot--
2807 carry -= multibyteAdd(rem, rem, v, 0);
2808 }
2809 }
2810
2811 // Cope with unbalanced division by performing block schoolbook division.
2812 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2813 pure nothrow @safe
2814 {
2815 import core.memory : GC;
2816 assert(quotient.length == u.length - v.length,
2817 "quotient must be of equal length of u - v");
2818 assert(v.length > 1, "v must not be empty");
2819 assert(u.length >= v.length, "u must be longer or of equal length as v");
2820 assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2821 assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2822 BigDigit [] scratch = new BigDigit[v.length + 1];
2823
2824 // Perform block schoolbook division, with 'v.length' blocks.
2825 auto m = u.length - v.length;
2826 while (m > v.length)
2827 {
2828 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2829 BigDigit saveq;
2830 if (mayOverflow)
2831 {
2832 u[m + v.length] = 0;
2833 saveq = quotient[m];
2834 }
2835 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2836 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2837 if (mayOverflow)
2838 {
2839 assert(quotient[m] == 0, "quotient must not be 0");
2840 quotient[m] = saveq;
2841 }
2842 m -= v.length;
2843 }
2844 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2845 () @trusted { GC.free(scratch.ptr); } ();
2846 }
2847
2848 @system unittest
2849 {
2850 version (none)
2851 {
2852 import core.stdc.stdio;
2853
2854 void printBiguint(const uint [] data)
2855 {
2856 char [] buff = biguintToHex(new char[data.length*9], data, '_');
2857 printf("%.*s\n", cast(int) buff.length, buff.ptr);
2858 }
2859
2860 void printDecimalBigUint(BigUint data)
2861 {
2862 auto str = data.toDecimalString(0);
2863 printf("%.*s\n", cast(int) str.length, str.ptr);
2864 }
2865 }
2866
2867 uint [] a, b;
2868 a = new uint[43];
2869 b = new uint[179];
2870 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2871 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2872
2873 a[$-1] |= 0x8000_0000;
2874 uint [] r = new uint[a.length];
2875 uint [] q = new uint[b.length-a.length+1];
2876
2877 divModInternal(q, r, b, a);
2878 q = q[0..$-1];
2879 uint [] r1 = r.dup;
2880 uint [] q1 = q.dup;
2881 blockDivMod(q, b, a);
2882 r = b[0 .. a.length];
2883 assert(r[] == r1[]);
2884 assert(q[] == q1[]);
2885 }
2886
2887 // biguintToOctal
2888 @safe unittest
2889 {
2890 enum bufSize = 5 * BigDigitBits / 3 + 1;
2891 auto buf = new char[bufSize];
2892 size_t i;
2893 BigDigit[] data = [ 342391 ];
2894
2895 // Basic functionality with single word
2896 i = biguintToOctal(buf, data);
2897 assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2898
2899 // Test carrying bits between words
2900 data = [ 0x77053977, 0x39770539, 0x00000005 ];
2901 i = biguintToOctal(buf, data);
2902 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2903
2904 // Test carried bits in the last word
2905 data = [ 0x80000000 ];
2906 i = biguintToOctal(buf, data);
2907 assert(buf[i .. $] == "20000000000");
2908
2909 // Test boundary between 3rd and 4th word where the number of bits is
2910 // divisible by 3 and no bits should be carried.
2911 //
2912 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2913 // rollover happens, so that if any bugs happen in wrongly adding the carry
2914 // to the next word, non-zero bits will show up in the output.
2915 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2916 i = biguintToOctal(buf, data);
2917 assert(buf[i .. $] == "2060000000001400000000030000000000");
2918
2919 // Boundary case: 0
2920 data = [ 0 ];
2921 i = biguintToOctal(buf, data);
2922 assert(buf[i .. $] == "0");
2923 }