1 /** Fundamental operations for arbitrary-precision arithmetic
2  *
3  * These functions are for internal use only.
4  */
5 /*          Copyright Don Clugston 2008 - 2010.
6  * Distributed under the Boost Software License, Version 1.0.
7  *    (See accompanying file LICENSE_1_0.txt or copy at
8  *          http://www.boost.org/LICENSE_1_0.txt)
9  */
10 /* References:
11    "Modern Computer Arithmetic" (MCA) is the primary reference for all
12     algorithms used in this library.
13   - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14     Version 0.5.9, (Oct 2010).
15   - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16     Max-Planck Institute fuer Informatik, (Oct 1998).
17   - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18     INRIA 4664, (Dec 2002).
19   - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20     http://bodrato.it/papers (2006).
21   - A. Fog, "Optimizing subroutines in assembly language",
22     www.agner.org/optimize (2008).
23   - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24     www.agner.org/optimize (2008).
25   - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26     and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27 
28 Idioms:
29   Many functions in this module use
30   'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31   in order to disable implicit conversion.
32 
33 */
34 module std.internal.math.biguintcore;
35 
36 version (D_InlineAsm_X86)
37 {
38     static import std.internal.math.biguintx86;
39 }
40 static import std.internal.math.biguintnoasm;
41 
42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43     KARATSUBASQUARELIMIT;
44 
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47 
48 private import std.traits;
49 private import std.range.primitives;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53 
54 private:
55 
56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58 // used by the other architectures too.
59 // See comments below in case it has to be refactored.
60 version (X86)
61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62 {
63     // must be checked before, otherwise D_InlineAsm_X86 is true.
64     if (__ctfe)
65         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66     // Runtime.
67     else version (D_InlineAsm_X86)
68         return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69     // Runtime if no asm available.
70     else
71         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72 }
73 // Any other architecture
74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75 
76 version (X86)
77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78 {
79     if (__ctfe)
80         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81     else version (D_InlineAsm_X86)
82         return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83     else
84         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85 }
86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87 
88 version (X86)
89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90 {
91     if (__ctfe)
92         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93     else version (D_InlineAsm_X86)
94         return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95     else
96         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97 }
98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99 
100 version (X86)
101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102 {
103     if (__ctfe)
104         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105     else version (D_InlineAsm_X86)
106         std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107     else
108         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109 }
110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111 
112 version (X86)
113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114 {
115     if (__ctfe)
116         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117     else version (D_InlineAsm_X86)
118         return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119     else
120         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121 }
122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123 
124 version (X86)
125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126 {
127     if (__ctfe)
128         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129     else version (D_InlineAsm_X86)
130         return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131     else
132         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133 }
134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135 
136 version (X86)
137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138 {
139     if (__ctfe)
140         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141     else version (D_InlineAsm_X86)
142         std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143     else
144         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145 }
146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147 
148 version (X86)
149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150 {
151     if (__ctfe)
152         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153     else version (D_InlineAsm_X86)
154         return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155     else
156         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157 }
158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159 
160 version (X86)
161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162 {
163     if (__ctfe)
164         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165     else version (D_InlineAsm_X86)
166         std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167     else
168         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169 }
170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171 
172 version (X86)
173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174 {
175     if (__ctfe)
176         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177     else version (D_InlineAsm_X86)
178         std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179     else
180         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181 }
182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183 
184 version (X86)
185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186 {
187     if (__ctfe)
188         std.internal.math.biguintnoasm.multibyteSquare(result, x);
189     else version (D_InlineAsm_X86)
190         std.internal.math.biguintx86.multibyteSquare(result, x);
191     else
192         std.internal.math.biguintnoasm.multibyteSquare(result, x);
193 }
194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195 
196 // Limits for when to switch between algorithms.
197 // Half the size of the data cache.
198 @nogc nothrow pure @safe size_t getCacheLimit()
199 {
200     import core.cpuid : dataCaches;
201     return dataCaches[0].size * 1024 / 2;
202 }
203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204 
205 
206 // These constants are used by shift operations
207 static if (BigDigit.sizeof == int.sizeof)
208 {
209     enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210     alias BIGHALFDIGIT = ushort;
211 }
212 else static if (BigDigit.sizeof == long.sizeof)
213 {
214     alias BIGHALFDIGIT = uint;
215     enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216 }
217 else static assert(0, "Unsupported BigDigit size");
218 
219 import std.exception : assumeUnique;
220 import std.traits : isIntegral;
221 enum BigDigitBits = BigDigit.sizeof*8;
222 template maxBigDigits(T)
223 if (isIntegral!T)
224 {
225     enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226 }
227 
228 static immutable BigDigit[] ZERO = [0];
229 static immutable BigDigit[] ONE = [1];
230 static immutable BigDigit[] TWO = [2];
231 static immutable BigDigit[] TEN = [10];
232 
233 
234 public:
235 
236 /// BigUint performs memory management and wraps the low-level calls.
237 struct BigUint
238 {
239 private:
240     pure invariant()
241     {
242         assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243                 "Invariant requires data to not empty or zero");
244     }
245 
246     immutable(BigDigit) [] data = ZERO;
247 
248     this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249     {
250        data = x;
251     }
252   package(std)  // used from: std.bigint
253     this(T)(T x) pure nothrow @safe scope if (isIntegral!T)
254     {
255         opAssign(x);
256     }
257 
258     enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
259         return assumeUnique(input);
260     };
261 public:
262     // Length in uints
263     @property size_t uintLength() pure nothrow const @safe @nogc scope
264     {
265         static if (BigDigit.sizeof == uint.sizeof)
266         {
267             return data.length;
268         }
269         else static if (BigDigit.sizeof == ulong.sizeof)
270         {
271             return data.length * 2 -
272             ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
273         }
274     }
275     @property size_t ulongLength() pure nothrow const @safe @nogc scope
276     {
277         static if (BigDigit.sizeof == uint.sizeof)
278         {
279             return (data.length + 1) >> 1;
280         }
281         else static if (BigDigit.sizeof == ulong.sizeof)
282         {
283             return data.length;
284         }
285     }
286 
287     // The value at (cast(ulong[]) data)[n]
288     ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
289     {
290         static if (BigDigit.sizeof == int.sizeof)
291         {
292             if (data.length == n*2 + 1) return data[n*2];
293             return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
294         }
295         else static if (BigDigit.sizeof == long.sizeof)
296         {
297             return data[n];
298         }
299     }
300 
301     uint peekUint(size_t n) pure nothrow const @safe @nogc scope
302     {
303         static if (BigDigit.sizeof == int.sizeof)
304         {
305             return data[n];
306         }
307         else
308         {
309             immutable x = data[n >> 1];
310             return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
311         }
312     }
313 
314     ///
315     void opAssign(Tulong)(Tulong u) pure nothrow @safe scope if (is (Tulong == ulong))
316     {
317         if (u == 0) data = ZERO;
318         else if (u == 1) data = ONE;
319         else if (u == 2) data = TWO;
320         else if (u == 10) data = TEN;
321         else
322         {
323             static if (BigDigit.sizeof == int.sizeof)
324             {
325                 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
326                 uint uhi = cast(uint)(u >> 32);
327                 if (uhi == 0)
328                 {
329                     data = [ulo];
330                 }
331                 else
332                 {
333                     data = [ulo, uhi];
334                 }
335             }
336             else static if (BigDigit.sizeof == long.sizeof)
337             {
338                 data = [u];
339             }
340         }
341     }
342     void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
343     {
344         this.data = y.data;
345     }
346 
347     ///
348     int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
349     {
350         if (data.length != y.data.length)
351             return (data.length > y.data.length) ?  1 : -1;
352         size_t k = highestDifferentDigit(data, y.data);
353         if (data[k] == y.data[k])
354             return 0;
355         return data[k] > y.data[k] ? 1 : -1;
356     }
357 
358     ///
359     int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope if (is (Tulong == ulong))
360     {
361         if (data.length > maxBigDigits!Tulong)
362             return 1;
363 
364         foreach_reverse (i; 0 .. maxBigDigits!Tulong)
365         {
366             BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
367             if (tmp == 0)
368                 if (data.length >= i+1)
369                 {
370                     // Since ZERO is [0], so we cannot simply return 1 here, as
371                     // data[i] would be 0 for i == 0 in that case.
372                     return (data[i] > 0) ? 1 : 0;
373                 }
374                 else
375                     continue;
376             else
377                 if (i+1 > data.length)
378                     return -1;
379                 else if (tmp != data[i])
380                     return data[i] > tmp ? 1 : -1;
381         }
382         return 0;
383     }
384 
385     bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
386     {
387            return y.data[] == data[];
388     }
389 
390     bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
391     {
392         if (data.length > 2)
393             return false;
394         uint ylo = cast(uint)(y & 0xFFFF_FFFF);
395         uint yhi = cast(uint)(y >> 32);
396         if (data.length == 2 && data[1]!=yhi)
397             return false;
398         if (data.length == 1 && yhi != 0)
399             return false;
400         return (data[0] == ylo);
401     }
402 
403     bool isZero() pure const nothrow @safe @nogc scope
404     {
405         return data.length == 1 && data[0] == 0;
406     }
407 
408     size_t numBytes() pure nothrow const @safe @nogc scope
409     {
410         return data.length * BigDigit.sizeof;
411     }
412 
413     // the extra bytes are added to the start of the string
414     char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
415     {
416         immutable predictlength = 20+20*(data.length/2); // just over 19
417         char [] buff = new char[frontExtraBytes + predictlength];
418         ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
419         return buff[sofar-frontExtraBytes..$];
420     }
421 
422     /** Convert to a hex string, printing a minimum number of digits 'minPadding',
423      *  allocating an additional 'frontExtraBytes' at the start of the string.
424      *  Padding is done with padChar, which may be '0' or ' '.
425      *  'separator' is a digit separation character. If non-zero, it is inserted
426      *  between every 8 digits.
427      *  Separator characters do not contribute to the minPadding.
428      */
429     char [] toHexString(int frontExtraBytes, char separator = 0,
430             int minPadding=0, char padChar = '0',
431             LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
432     {
433         // Calculate number of extra padding bytes
434         size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
435             ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
436 
437         // Length not including separator bytes
438         size_t lenBytes = data.length * 2 * BigDigit.sizeof;
439 
440         // Calculate number of separator bytes
441         size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
442         immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
443 
444         char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
445         biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
446         if (extraPad > 0)
447         {
448             if (separator)
449             {
450                 size_t start = frontExtraBytes; // first index to pad
451                 if (extraPad &7)
452                 {
453                     // Do 1 to 7 extra zeros.
454                     buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
455                     buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
456                     start += (extraPad & 7) + 1;
457                 }
458                 for (int i=0; i< (extraPad >> 3); ++i)
459                 {
460                     buff[start .. start + 8] = padChar;
461                     buff[start + 8] = (padChar == ' ' ? ' ' : separator);
462                     start += 9;
463                 }
464             }
465             else
466             {
467                 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
468             }
469         }
470         int z = frontExtraBytes;
471         if (lenBytes > minPadding)
472         {
473             // Strip leading zeros.
474             ptrdiff_t maxStrip = lenBytes - minPadding;
475             while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
476             {
477                 ++z;
478                 --maxStrip;
479             }
480         }
481         if (padChar!='0')
482         {
483             // Convert leading zeros into padChars.
484             for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
485             {
486                 if (buff[k]=='0') buff[k]=padChar;
487             }
488         }
489         return buff[z-frontExtraBytes..$];
490     }
491 
492     /**
493      * Convert to an octal string.
494      */
495     char[] toOctalString() pure nothrow @safe const scope
496     {
497         auto predictLength = 1 + data.length*BigDigitBits / 3;
498         char[] buff = new char[predictLength];
499         size_t firstNonZero = biguintToOctal(buff, data);
500         return buff[firstNonZero .. $];
501     }
502 
503     // return false if invalid character found
504     bool fromHexString(Range)(Range s) scope if (
505         isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
506     {
507         import std.range : walkLength;
508 
509         //Strip leading zeros
510         while (!s.empty && s.front == '0')
511             s.popFront;
512 
513         if (s.empty)
514         {
515             data = ZERO;
516             return true;
517         }
518 
519         immutable len = (s.save.walkLength + 15) / 4;
520         auto tmp = new BigDigit[len + 1];
521         uint part, sofar, partcount;
522 
523         foreach_reverse (character; s)
524         {
525             if (character == '_')
526                 continue;
527 
528             uint x;
529             if (character >= '0' && character <= '9')
530             {
531                 x = character - '0';
532             }
533             else if (character >= 'A' && character <= 'F')
534             {
535                 x = character - 'A' + 10;
536             }
537             else if (character >= 'a' && character <= 'f')
538             {
539                 x = character - 'a' + 10;
540             }
541             else
542             {
543                 return false;
544             }
545 
546             part >>= 4;
547             part |= (x << (32 - 4));
548             ++partcount;
549 
550             if (partcount == 8)
551             {
552                 tmp[sofar] = part;
553                 ++sofar;
554                 partcount = 0;
555                 part = 0;
556             }
557         }
558         if (part)
559         {
560             for ( ; partcount != 8; ++partcount) part >>= 4;
561             tmp[sofar] = part;
562             ++sofar;
563         }
564         if (sofar == 0)
565             data = ZERO;
566         else
567             data = trustedAssumeUnique(tmp[0 .. sofar]);
568 
569         return true;
570     }
571 
572     // return true if OK; false if erroneous characters found
573     bool fromDecimalString(Range)(Range s) scope if (
574         isForwardRange!Range && isSomeChar!(ElementType!Range))
575     {
576         import std.range : walkLength;
577 
578         while (!s.empty && s.front == '0')
579         {
580             s.popFront;
581         }
582 
583         if (s.empty)
584         {
585             data = ZERO;
586             return true;
587         }
588 
589         auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
590         auto tmp = new BigDigit[predict_length];
591 
592         tmp.length = biguintFromDecimal(tmp, s);
593 
594         data = trustedAssumeUnique(tmp);
595         return true;
596     }
597 
598     void fromMagnitude(Range)(Range magnitude) scope
599         if (isInputRange!Range
600             && (isForwardRange!Range || hasLength!Range)
601             && isUnsigned!(ElementType!Range))
602     {
603         while (!magnitude.empty && magnitude.front == 0)
604             magnitude.popFront;
605         static if (hasLength!Range)
606             immutable inputLen = magnitude.length;
607         else
608             immutable inputLen = magnitude.save.walkLength;
609         if (!inputLen)
610         {
611             this.data = ZERO;
612             return;
613         }
614         // `magnitude` has its most significant element first but BigUint.data
615         // stores the most significant last.
616         BigDigit[] newDigits;
617         alias E = ElementType!Range;
618         static if (E.sizeof == BigDigit.sizeof)
619         {
620             newDigits = new BigDigit[inputLen];
621             foreach_reverse (ref digit; newDigits)
622             {
623                 digit = magnitude.front;
624                 magnitude.popFront();
625             }
626         }
627         else static if (E.sizeof < BigDigit.sizeof)
628         {
629             enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
630             newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
631             immutable remainder = inputLen % elementsPerDigit;
632             // If there is a remainder specially assemble the most significant digit.
633             if (remainder)
634             {
635                 BigDigit tmp = magnitude.front;
636                 magnitude.popFront();
637                 foreach (_; 1 .. remainder)
638                 {
639                     tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
640                     magnitude.popFront();
641                 }
642                 newDigits[$-1] = tmp;
643             }
644             // Assemble full digits from most to least significant.
645             foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
646             {
647                 BigDigit tmp;
648                 static foreach (n; 0 .. elementsPerDigit)
649                 {
650                     tmp |= cast(BigDigit) magnitude.front <<
651                         ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
652                     magnitude.popFront();
653                 }
654                 digit = tmp;
655             }
656         }
657         else static if (E.sizeof > BigDigit.sizeof)
658         {
659             enum digitsPerElement = E.sizeof / BigDigit.sizeof;
660             newDigits = new BigDigit[inputLen * digitsPerElement];
661             size_t i = newDigits.length - 1;
662             foreach (element; magnitude)
663             {
664                 static foreach (n; 0 .. digitsPerElement)
665                     newDigits[i - n] =
666                         cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
667                 i -= digitsPerElement;
668             }
669             while (newDigits[$-1] == 0)
670                 newDigits = newDigits[0 .. $-1];
671         }
672         else
673             static assert(0);
674         this.data = trustedAssumeUnique(newDigits);
675         return;
676     }
677 
678     nothrow pure @safe unittest
679     {
680         immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
681         // Internal representation is most-significant-last but `fromMagnitude`
682         // argument is most-significant-first.
683         immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
684         BigUint b;
685         // Test with reference magnitude.
686         b.fromMagnitude(referenceMagnitude);
687         assert(b.data == referenceData);
688         // Test ubyte array.
689         import std.bitmanip : nativeToBigEndian;
690         ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
691             nativeToBigEndian(referenceMagnitude[1]) ~
692             nativeToBigEndian(referenceMagnitude[2]);
693         b.data = ZERO;
694         b.fromMagnitude(ubyteMagnitude);
695         assert(b.data == referenceData);
696         // Test ulong array.
697         static if (BigDigit.sizeof == uint.sizeof)
698             immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
699                 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
700             ];
701         else static if (BigDigit.sizeof == ulong.sizeof)
702             alias ulongMagnitude = referenceMagnitude;
703         b.data = ZERO;
704         b.fromMagnitude(ulongMagnitude);
705         assert(b.data == referenceData);
706     }
707 
708     ////////////////////////
709     //
710     // All of these member functions create a new BigUint.
711 
712     // return x >> y
713     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
714         if (op == ">>" && is (Tulong == ulong))
715     {
716         assert(y > 0, "Can not right shift BigUint by 0");
717         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
718         if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
719         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
720         if (bits == 0)
721         {
722             return BigUint(data[words..$]);
723         }
724         else
725         {
726             uint [] result = new BigDigit[data.length - words];
727             multibyteShr(result, data[words..$], bits);
728 
729             if (result.length > 1 && result[$-1] == 0)
730                 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
731             else
732                 return BigUint(trustedAssumeUnique(result));
733         }
734     }
735 
736     // return x << y
737     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
738         if (op == "<<" && is (Tulong == ulong))
739     {
740         assert(y > 0, "Can not left shift BigUint by 0");
741         if (isZero()) return this;
742         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
743         assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
744                 "Shift result exceeds temporary store");
745         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
746         BigDigit [] result = new BigDigit[data.length + words+1];
747         result[0 .. words] = 0;
748         if (bits == 0)
749         {
750             result[words .. words+data.length] = data[];
751             return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
752         }
753         else
754         {
755             immutable c = multibyteShl(result[words .. words+data.length], data, bits);
756             if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
757             result[$-1] = c;
758             return BigUint(trustedAssumeUnique(result));
759         }
760     }
761 
762     // If wantSub is false, return x + y, leaving sign unchanged
763     // If wantSub is true, return abs(x - y), negating sign if x < y
764     static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y,
765             bool wantSub, ref bool sign) pure nothrow @safe if (is(Tulong == ulong))
766     {
767         BigUint r;
768         if (wantSub)
769         {   // perform a subtraction
770             if (x.data.length > 2)
771             {
772                 // subInt returns GC allocated array, can be safely cast to immutable
773                 r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
774             }
775             else
776             {   // could change sign!
777                 ulong xx = x.data[0];
778                 if (x.data.length > 1)
779                     xx += (cast(ulong) x.data[1]) << 32;
780                 ulong d;
781                 if (xx <= y)
782                 {
783                     d = y - xx;
784                     sign = !sign;
785                 }
786                 else
787                 {
788                     d = xx - y;
789                 }
790                 if (d == 0)
791                 {
792                     r = 0UL;
793                     sign = false;
794                     return r;
795                 }
796                 if (d > uint.max)
797                 {
798                     r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
799                 }
800                 else
801                 {
802                     r.data = [cast(uint)(d & 0xFFFF_FFFF)];
803                 }
804             }
805         }
806         else
807         {
808             // addInt returns GC allocated array, can be safely cast to immutable
809             r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
810         }
811         return r;
812     }
813 
814     // If wantSub is false, return x + y, leaving sign unchanged.
815     // If wantSub is true, return abs(x - y), negating sign if x<y
816     static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
817         pure nothrow @safe
818     {
819         BigUint r;
820         if (wantSub)
821         {   // perform a subtraction
822             bool negative;
823             // sub returns GC allocated array, can be safely cast to immutable
824             r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
825             sign ^= negative;
826             if (r.isZero())
827             {
828                 sign = false;
829             }
830         }
831         else
832         {
833             // add returns GC allocated array, can be safely cast to immutable
834             r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
835         }
836         return r;
837     }
838 
839 
840     //  return x*y.
841     static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
842     {
843         if (y == 0 || x == 0) return BigUint(ZERO);
844         static if (T.sizeof * 8 <= 32)
845             uint hi = 0;
846         else
847             uint hi = cast(uint) (y >>> 32);
848         uint lo = cast(uint) (y & 0xFFFF_FFFF);
849         uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
850         result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
851         if (hi != 0)
852         {
853             result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
854                 x.data, hi, 0);
855         }
856         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
857     }
858 
859     /*  return x * y.
860      */
861     static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
862     {
863         if (y == 0 || x == 0)
864             return BigUint(ZERO);
865         auto len = x.data.length + y.data.length;
866         BigDigit [] result = new BigDigit[len];
867         if (y.data.length > x.data.length)
868         {
869             mulInternal(result, y.data, x.data);
870         }
871         else
872         {
873             if (x.data[]==y.data[]) squareInternal(result, x.data);
874             else mulInternal(result, x.data, y.data);
875         }
876         // the highest element could be zero,
877         // in which case we need to reduce the length
878         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
879     }
880 
881     // return x / y
882     static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
883     if ( is(immutable T == immutable uint) )
884     {
885         uint y = y_;
886         if (y == 1)
887             return x;
888         uint [] result = new BigDigit[x.data.length];
889         if ((y&(-y))==y)
890         {
891             assert(y != 0, "BigUint division by zero");
892             // perfect power of 2
893             uint b = 0;
894             for (;y != 1; y>>=1)
895             {
896                 ++b;
897             }
898             multibyteShr(result, x.data, b);
899         }
900         else
901         {
902             result[] = x.data[];
903             cast(void) multibyteDivAssign(result, y, 0);
904         }
905         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
906     }
907 
908     static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
909     if ( is(immutable T == immutable ulong) )
910     {
911         if (y <= uint.max)
912             return divInt!uint(x, cast(uint) y);
913         if (x.data.length < 2)
914             return BigUint(ZERO);
915         uint hi = cast(uint)(y >>> 32);
916         uint lo = cast(uint)(y & 0xFFFF_FFFF);
917         immutable uint[2] z = [lo, hi];
918         BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
919         divModInternal(result, null, x.data, z[]);
920         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
921     }
922 
923     // return x % y
924     static uint modInt(T)(scope BigUint x, T y_) pure if ( is(immutable T == immutable uint) )
925     {
926         import core.memory : GC;
927         uint y = y_;
928         assert(y != 0, "% 0 not allowed");
929         if ((y&(-y)) == y)
930         {   // perfect power of 2
931             return x.data[0] & (y-1);
932         }
933         else
934         {
935             // horribly inefficient - malloc, copy, & store are unnecessary.
936             uint [] wasteful = new BigDigit[x.data.length];
937             wasteful[] = x.data[];
938             immutable rem = multibyteDivAssign(wasteful, y, 0);
939             () @trusted { GC.free(wasteful.ptr); } ();
940             return rem;
941         }
942     }
943 
944     // return x / y
945     static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
946     {
947         if (y.data.length > x.data.length)
948             return BigUint(ZERO);
949         if (y.data.length == 1)
950             return divInt(x, y.data[0]);
951         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
952            divModInternal(result, null, x.data, y.data);
953         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
954     }
955 
956     // return x % y
957     static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
958     {
959         if (y.data.length > x.data.length) return x;
960         if (y.data.length == 1)
961         {
962             return BigUint([modInt(x, y.data[0])]);
963         }
964         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
965         BigDigit [] rem = new BigDigit[y.data.length];
966         divModInternal(result, rem, x.data, y.data);
967         return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
968     }
969 
970     // Return x / y in quotient, x % y in remainder
971     static void divMod(BigUint x, scope BigUint y,
972                        out BigUint quotient, out BigUint remainder) pure nothrow @safe
973     {
974         /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
975         if (y.data.length > x.data.length)
976         {
977             quotient = 0uL;
978             remainder = x;
979         }
980         else if (y.data.length == 1)
981         {
982             quotient = divInt(x, y.data[0]);
983             remainder = BigUint([modInt(x, y.data[0])]);
984         }
985         else
986         {
987             BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
988             BigDigit[] rem = new BigDigit[y.data.length];
989             divModInternal(quot, rem, x.data, y.data);
990             quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
991             remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
992         }
993     }
994 
995     // return x op y
996     static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
997     pure nothrow @safe if (op == "|" || op == "^" || op == "&")
998     {
999         auto d1 = includeSign(x.data, y.uintLength, xSign);
1000         auto d2 = includeSign(y.data, x.uintLength, ySign);
1001 
1002         foreach (i; 0 .. d1.length)
1003         {
1004             mixin("d1[i] " ~ op ~ "= d2[i];");
1005         }
1006 
1007         mixin("resultSign = xSign " ~ op ~ " ySign;");
1008 
1009         if (resultSign)
1010         {
1011             twosComplement(d1, d1);
1012         }
1013 
1014         return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1015     }
1016 
1017     /**
1018      * Return a BigUint which is x raised to the power of y.
1019      * Method: Powers of 2 are removed from x, then left-to-right binary
1020      * exponentiation is used.
1021      * Memory allocation is minimized: at most one temporary BigUint is used.
1022      */
1023     static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1024     {
1025         // Deal with the degenerate cases first.
1026         if (y == 0) return BigUint(ONE);
1027         if (y == 1) return x;
1028         if (x == 0 || x == 1) return x;
1029 
1030         BigUint result;
1031 
1032         // Simplify, step 1: Remove all powers of 2.
1033         uint firstnonzero = firstNonZeroDigit(x.data);
1034         // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1035         // where BigDigitBits = BigDigit.sizeof * 8
1036 
1037         // See if x[firstnonzero..$] can now fit into a single digit.
1038         bool singledigit = ((x.data.length - firstnonzero) == 1);
1039         // If true, then x0 is that digit
1040         // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1041         BigDigit x0 = x.data[firstnonzero];
1042         assert(x0 != 0, "pow(0, y) not allowed");
1043         // Length of the non-zero portion
1044         size_t nonzerolength = x.data.length - firstnonzero;
1045         ulong y0;
1046         uint evenbits = 0; // number of even bits in the bottom of x
1047         while (!(x0 & 1))
1048         {
1049             x0 >>= 1;
1050             ++evenbits;
1051         }
1052 
1053         if (x.data.length- firstnonzero == 2)
1054         {
1055             // Check for a single digit straddling a digit boundary
1056             const BigDigit x1 = x.data[firstnonzero+1];
1057             if ((x1 >> evenbits) == 0)
1058             {
1059                 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1060                 singledigit = true;
1061             }
1062         }
1063         // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1064 
1065         uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1066 
1067         // Simplify, step 2: For singledigits, see if we can trivially reduce y
1068 
1069         BigDigit finalMultiplier = 1UL;
1070 
1071         if (singledigit)
1072         {
1073             // x fits into a single digit. Raise it to the highest power we can
1074             // that still fits into a single digit, then reduce the exponent accordingly.
1075             // We're quite likely to have a residual multiply at the end.
1076             // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1077             // and 5^^13 still fits into a uint.
1078             evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1079             if (x0 == 1)
1080             {   // Perfect power of 2
1081                 result = 1UL;
1082                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1083             }
1084             immutable p = highestPowerBelowUintMax(x0);
1085             if (y <= p)
1086             {   // Just do it with pow
1087                 result = cast(ulong) intpow(x0, y);
1088                 if (evenbits + firstnonzero == 0)
1089                     return result;
1090                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1091             }
1092             y0 = y / p;
1093             finalMultiplier = intpow(x0, y - y0*p);
1094             x0 = intpow(x0, p);
1095             // Result is x0
1096             nonzerolength = 1;
1097         }
1098         // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1099 
1100         // Perform a crude check for overflow and allocate result buffer.
1101         // The length required is y * lg2(x) bits.
1102         // which will always fit into y*x.length digits. But this is
1103         // a gross overestimate if x is small (length 1 or 2) and the highest
1104         // digit is nearly empty.
1105         // A better estimate is:
1106         //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1107         //  and the first term is always between
1108         //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1109         //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1110         // For single digit payloads, we already have
1111         //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1112         // and x0 is almost a full digit, so it's a tight estimate.
1113         // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1114         // Note that the divisions must be rounded up.
1115 
1116         // Estimated length in BigDigits
1117         immutable estimatelength = singledigit
1118             ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1119             :  x.data.length * y;
1120         // Imprecise check for overflow. Makes the extreme cases easier to debug
1121         // (less extreme overflow will result in an out of memory error).
1122         if (estimatelength > uint.max/(4*BigDigit.sizeof))
1123             assert(0, "Overflow in BigInt.pow");
1124 
1125         // The result buffer includes space for all the trailing zeros
1126         BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1127 
1128         // Do all the powers of 2!
1129         size_t result_start = cast(size_t)( firstnonzero * y
1130             + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1131 
1132         resultBuffer[0 .. result_start] = 0;
1133         BigDigit [] t1 = resultBuffer[result_start..$];
1134         BigDigit [] r1;
1135 
1136         if (singledigit)
1137         {
1138             r1 = t1[0 .. 1];
1139             r1[0] = x0;
1140             y = y0;
1141         }
1142         else
1143         {
1144             // It's not worth right shifting by evenbits unless we also shrink the length after each
1145             // multiply or squaring operation. That might still be worthwhile for large y.
1146             r1 = t1[0 .. x.data.length - firstnonzero];
1147             r1[0..$] = x.data[firstnonzero..$];
1148         }
1149 
1150         if (y>1)
1151         {   // Set r1 = r1 ^^ y.
1152             // The secondary buffer only needs space for the multiplication results
1153             BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1154             BigDigit [] r2;
1155 
1156             int shifts = 63; // num bits in a long
1157             while (!(y & 0x8000_0000_0000_0000L))
1158             {
1159                 y <<= 1;
1160                 --shifts;
1161             }
1162             y <<=1;
1163 
1164             while (y != 0)
1165             {
1166                 // For each bit of y: Set r1 =  r1 * r1
1167                 // If the bit is 1, set r1 = r1 * x
1168                 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1169                 // Optimization opportunity: if more than 2 bits in y are set,
1170                 // it's usually possible to reduce the number of multiplies
1171                 // by caching odd powers of x. eg for y = 54,
1172                 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1173                 r2 = t2[0 .. r1.length*2];
1174                 squareInternal(r2, r1);
1175                 if (y & 0x8000_0000_0000_0000L)
1176                 {
1177                     r1 = t1[0 .. r2.length + nonzerolength];
1178                     if (singledigit)
1179                     {
1180                         r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1181                     }
1182                     else
1183                     {
1184                         mulInternal(r1, r2, x.data[firstnonzero..$]);
1185                     }
1186                 }
1187                 else
1188                 {
1189                     r1 = t1[0 .. r2.length];
1190                     r1[] = r2[];
1191                 }
1192                 y <<=1;
1193                 shifts--;
1194             }
1195             while (shifts>0)
1196             {
1197                 r2 = t2[0 .. r1.length * 2];
1198                 squareInternal(r2, r1);
1199                 r1 = t1[0 .. r2.length];
1200                 r1[] = r2[];
1201                 --shifts;
1202             }
1203         }
1204 
1205         if (finalMultiplier != 1)
1206         {
1207             const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1208             if (carry)
1209             {
1210                 r1 = t1[0 .. r1.length + 1];
1211                 r1[$-1] = carry;
1212             }
1213         }
1214         if (evenshiftbits)
1215         {
1216             const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1217             if (carry != 0)
1218             {
1219                 r1 = t1[0 .. r1.length + 1];
1220                 r1[$ - 1] = carry;
1221             }
1222         }
1223         while (r1[$ - 1]==0)
1224         {
1225             r1=r1[0 .. $ - 1];
1226         }
1227         return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1228     }
1229 
1230     // Implement toHash so that BigUint works properly as an AA key.
1231     size_t toHash() const @nogc nothrow pure @safe scope
1232     {
1233         return .hashOf(data);
1234     }
1235 
1236 } // end BigUint
1237 
1238 @safe pure nothrow unittest
1239 {
1240     // ulong comparison test
1241     BigUint a = [1];
1242     assert(a == 1);
1243     // https://issues.dlang.org/show_bug.cgi?id=9548
1244     assert(a < 0x8000_0000_0000_0000UL);
1245 
1246     // https://issues.dlang.org/show_bug.cgi?id=12234
1247     BigUint z = [0];
1248     assert(z == 0UL);
1249     assert(!(z > 0UL));
1250     assert(!(z < 0UL));
1251 }
1252 
1253 // https://issues.dlang.org/show_bug.cgi?id=16223
1254 @system pure nothrow unittest
1255 {
1256     BigUint a = [3];
1257     int b = 5;
1258     assert(BigUint.mulInt(a,b) == 15);
1259 }
1260 
1261 // Remove leading zeros from x, to restore the BigUint invariant
1262 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1263 {
1264     size_t k = x.length;
1265     while (k>1 && x[k - 1]==0) --k;
1266     return x[0 .. k];
1267 }
1268 
1269 pure @safe unittest
1270 {
1271    BigUint r = BigUint([5]);
1272    BigUint t = BigUint([7]);
1273    BigUint s = BigUint.mod(r, t);
1274    assert(s == 5);
1275 }
1276 
1277 
1278 @safe pure unittest
1279 {
1280     BigUint r;
1281     r = 5UL;
1282     assert(r.peekUlong(0) == 5UL);
1283     assert(r.peekUint(0) == 5U);
1284     r = 0x1234_5678_9ABC_DEF0UL;
1285     assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1286     assert(r.peekUint(0) == 0x9ABC_DEF0U);
1287 }
1288 
1289 
1290 // Pow tests
1291 pure @safe unittest
1292 {
1293     BigUint r, s;
1294     r.fromHexString("80000000_00000001");
1295     s = BigUint.pow(r, 5);
1296     r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1297       ~ "_00000002_80000000_00000001");
1298     assert(s == r);
1299     s = 10UL;
1300     s = BigUint.pow(s, 39);
1301     r.fromDecimalString("1000000000000000000000000000000000000000");
1302     assert(s == r);
1303     r.fromHexString("1_E1178E81_00000000");
1304     s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1305 
1306     r.fromDecimalString("000_000_00");
1307     assert(r == 0);
1308     r.fromDecimalString("0007");
1309     assert(r == 7);
1310     r.fromDecimalString("0");
1311     assert(r == 0);
1312 }
1313 
1314 // Radix conversion tests
1315 @safe pure unittest
1316 {
1317     BigUint r;
1318     r.fromHexString("1_E1178E81_00000000");
1319     assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1320     assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1321     assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1322     assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1323     assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1324     assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1325     assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1326     assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1327     r = 0UL;
1328     assert(r.toHexString(0, '_', 0) == "0");
1329     assert(r.toHexString(0, '_', 7) == "0000000");
1330     assert(r.toHexString(0, '_', 7, ' ') == "      0");
1331     assert(r.toHexString(0, '#', 9) == "0#00000000");
1332     assert(r.toHexString(0, 0, 9) == "000000000");
1333 }
1334 
1335 //
1336 @safe pure unittest
1337 {
1338     BigUint r;
1339     r.fromHexString("1_E1178E81_00000000");
1340     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1341     assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1342     assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1343     assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1344     assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1345     assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1346     assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1347     assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1348     r = 0UL;
1349     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1350     assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1351     assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1352     assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1353     assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1354     assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1355 }
1356 
1357 
1358 private:
1359 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1360 pure nothrow @safe
1361 {
1362     foreach (i; 0 .. x.length)
1363     {
1364         result[i] = ~x[i];
1365     }
1366     result[x.length..$] = BigDigit.max;
1367 
1368     foreach (i; 0 .. result.length)
1369     {
1370         if (result[i] == BigDigit.max)
1371         {
1372             result[i] = 0;
1373         }
1374         else
1375         {
1376             result[i] += 1;
1377             break;
1378         }
1379     }
1380 }
1381 
1382 // Encode BigInt as BigDigit array (sign and 2's complement)
1383 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1384 pure nothrow @safe
1385 {
1386     size_t length = (x.length > minSize) ? x.length : minSize;
1387     BigDigit [] result = new BigDigit[length];
1388     if (sign)
1389     {
1390         twosComplement(x, result);
1391     }
1392     else
1393     {
1394         result[0 .. x.length] = x;
1395     }
1396     return result;
1397 }
1398 
1399 // works for any type
1400 T intpow(T)(T x, ulong n) pure nothrow @safe
1401 {
1402     T p;
1403 
1404     switch (n)
1405     {
1406     case 0:
1407         p = 1;
1408         break;
1409 
1410     case 1:
1411         p = x;
1412         break;
1413 
1414     case 2:
1415         p = x * x;
1416         break;
1417 
1418     default:
1419         p = 1;
1420         while (1)
1421         {
1422             if (n & 1)
1423                 p *= x;
1424             n >>= 1;
1425             if (!n)
1426                 break;
1427             x *= x;
1428         }
1429         break;
1430     }
1431     return p;
1432 }
1433 
1434 
1435 //  returns the maximum power of x that will fit in a uint.
1436 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1437 {
1438      assert(x > 1, "x must be greater than 1");
1439      static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1440                                           8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1441      if (x<24) return maxpwr[x-2];
1442      if (x<41) return 6;
1443      if (x<85) return 5;
1444      if (x<256) return 4;
1445      if (x<1626) return 3;
1446      if (x<65_536) return 2;
1447      return 1;
1448 }
1449 
1450 //  returns the maximum power of x that will fit in a ulong.
1451 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1452 {
1453      assert(x > 1, "x must be greater than 1");
1454      static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1455                                          17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1456                                          14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1457                                          12, 12, 12, 12, 12, 12, 12, 12, 12];
1458      if (x<41) return maxpwr[x-2];
1459      if (x<57) return 11;
1460      if (x<85) return 10;
1461      if (x<139) return 9;
1462      if (x<256) return 8;
1463      if (x<566) return 7;
1464      if (x<1626) return 6;
1465      if (x<7132) return 5;
1466      if (x<65_536) return 4;
1467      if (x<2_642_246) return 3;
1468      return 2;
1469 }
1470 
1471 version (StdUnittest)
1472 {
1473 
1474 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1475 {
1476      int pwr = 1;
1477      for (ulong q = x;x*q < cast(ulong) uint.max; )
1478      {
1479          q*=x; ++pwr;
1480      }
1481      return pwr;
1482 }
1483 
1484 @safe pure unittest
1485 {
1486     assert(highestPowerBelowUintMax(10)==9);
1487     for (int k=82; k<88; ++k)
1488     {
1489         assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1490     }
1491 }
1492 
1493 }
1494 
1495 
1496 /*  General unsigned subtraction routine for bigints.
1497  *  Sets result = x - y. If the result is negative, negative will be true.
1498  * Returns:
1499  *    unique memory
1500  */
1501 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1502 pure nothrow @safe
1503 {
1504     if (x.length == y.length)
1505     {
1506         // There's a possibility of cancellation, if x and y are almost equal.
1507         ptrdiff_t last = highestDifferentDigit(x, y);
1508         BigDigit [] result = new BigDigit[last+1];
1509         if (x[last] < y[last])
1510         {   // we know result is negative
1511             multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1512             *negative = true;
1513         }
1514         else
1515         {   // positive or zero result
1516             multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1517             *negative = false;
1518         }
1519         while (result.length > 1 && result[$-1] == 0)
1520         {
1521             result = result[0..$-1];
1522         }
1523 //        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1524         return result;
1525     }
1526     // Lengths are different
1527     const(BigDigit) [] large, small;
1528     if (x.length < y.length)
1529     {
1530         *negative = true;
1531         large = y; small = x;
1532     }
1533     else
1534     {
1535         *negative = false;
1536         large = x; small = y;
1537     }
1538     // result.length will be equal to larger length, or could decrease by 1.
1539 
1540 
1541     BigDigit [] result = new BigDigit[large.length];
1542     BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1543     result[small.length..$] = large[small.length..$];
1544     if (carry)
1545     {
1546         multibyteIncrementAssign!('-')(result[small.length..$], carry);
1547     }
1548     while (result.length > 1 && result[$-1] == 0)
1549     {
1550         result = result[0..$-1];
1551     }
1552     return result;
1553 }
1554 
1555 
1556 /*
1557  * return a + b
1558  * Returns:
1559  *    unique memory
1560  */
1561 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1562 {
1563     const(BigDigit) [] x, y;
1564     if (a.length < b.length)
1565     {
1566         x = b; y = a;
1567     }
1568     else
1569     {
1570         x = a; y = b;
1571     }
1572     // now we know x.length > y.length
1573     // create result. add 1 in case it overflows
1574     BigDigit [] result = new BigDigit[x.length + 1];
1575 
1576     BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1577     if (x.length != y.length)
1578     {
1579         result[y.length..$-1]= x[y.length..$];
1580         carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1581     }
1582     if (carry)
1583     {
1584         result[$-1] = carry;
1585         return result;
1586     }
1587     else
1588         return result[0..$-1];
1589 }
1590 
1591 /**  return x + y
1592  */
1593 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1594 {
1595     uint hi = cast(uint)(y >>> 32);
1596     uint lo = cast(uint)(y& 0xFFFF_FFFF);
1597     auto len = x.length;
1598     if (x.length < 2 && hi != 0) ++len;
1599     BigDigit [] result = new BigDigit[len+1];
1600     result[0 .. x.length] = x[];
1601     if (x.length < 2 && hi != 0)
1602     {
1603         result[1]=hi;
1604         hi=0;
1605     }
1606     uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1607     if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1608     if (carry)
1609     {
1610         result[$-1] = carry;
1611         return result;
1612     }
1613     else
1614         return result[0..$-1];
1615 }
1616 
1617 /** Return x - y.
1618  *  x must be greater than y.
1619  */
1620 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1621 {
1622     uint hi = cast(uint)(y >>> 32);
1623     uint lo = cast(uint)(y & 0xFFFF_FFFF);
1624     BigDigit [] result = new BigDigit[x.length];
1625     result[] = x[];
1626     multibyteIncrementAssign!('-')(result[], lo);
1627     if (hi)
1628         multibyteIncrementAssign!('-')(result[1..$], hi);
1629     if (result[$-1] == 0)
1630         return result[0..$-1];
1631     else
1632         return result;
1633 }
1634 
1635 /**  General unsigned multiply routine for bigints.
1636  *  Sets result = x * y.
1637  *
1638  *  The length of y must not be larger than the length of x.
1639  *  Different algorithms are used, depending on the lengths of x and y.
1640  *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1641  *  unbalanced case. (But I doubt it would be faster in practice).
1642  *
1643  */
1644 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1645     pure nothrow @safe
1646 {
1647     import core.memory : GC;
1648     assert( result.length == x.length + y.length,
1649             "result array must have enough space to store computed result");
1650     assert( y.length > 0, "y must not be empty");
1651     assert( x.length >= y.length, "x must be greater or equal than y");
1652     if (y.length <= KARATSUBALIMIT)
1653     {
1654         // Small multiplier, we'll just use the asm classic multiply.
1655         if (y.length == 1)
1656         {   // Trivial case, no cache effects to worry about
1657             result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1658             return;
1659         }
1660 
1661         immutable CACHELIMIT = getCacheLimit;
1662         if (x.length + y.length < CACHELIMIT)
1663             return mulSimple(result, x, y);
1664 
1665         // If x is so big that it won't fit into the cache, we divide it into chunks
1666         // Every chunk must be greater than y.length.
1667         // We make the first chunk shorter, if necessary, to ensure this.
1668 
1669         auto chunksize = CACHELIMIT / y.length;
1670         immutable residual  =  x.length % chunksize;
1671         if (residual < y.length)
1672         {
1673             chunksize -= y.length;
1674         }
1675 
1676         // Use schoolbook multiply.
1677         mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1678         auto done = chunksize;
1679 
1680         while (done < x.length)
1681         {
1682             // result[done .. done+ylength] already has a value.
1683             chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1684             BigDigit [KARATSUBALIMIT] partial;
1685             partial[0 .. y.length] = result[done .. done+y.length];
1686             mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1687             addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1688             done += chunksize;
1689         }
1690         return;
1691     }
1692 
1693     immutable half = (x.length >> 1) + (x.length & 1);
1694     if (2*y.length*y.length <= x.length*x.length)
1695     {
1696         // UNBALANCED MULTIPLY
1697         // Use school multiply to cut into quasi-squares of Karatsuba-size
1698         // or larger. The ratio of the two sides of the 'square' must be
1699         // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1700         //
1701         // For maximum performance, we want the ratio to be as close to
1702         // 1:1 as possible. To achieve this, we can either pad x or y.
1703         // The best choice depends on the modulus x%y.
1704         auto numchunks = x.length / y.length;
1705         auto chunksize = y.length;
1706         auto extra =  x.length % y.length;
1707         auto maxchunk = chunksize + extra;
1708         bool paddingY; // true = we're padding Y, false = we're padding X.
1709         bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1710         if (numchunks == 1 && isExtraSmall)
1711         {
1712             // We divide (x_first_half * y) and (x_last_half * y)
1713             // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1714             // (1.414 ~ 1.707)/2:1 is balanced.
1715             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1716             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1717             scratchbuff = scratchbuff[0 .. $ - y.length];
1718             mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1719             partial[] = result[half .. half + y.length];
1720             mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1721             BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1722             if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1723             () @trusted { GC.free(scratchbuff.ptr); } ();
1724         }
1725         else
1726         {
1727             if (isExtraSmall)
1728             {
1729                 // The leftover bit is small enough that it should be incorporated
1730                 // in the existing chunks.
1731                 // Make all the chunks a tiny bit bigger
1732                 // (We're padding y with zeros)
1733                 chunksize += extra / numchunks;
1734                 extra = x.length - chunksize*numchunks;
1735                 // there will probably be a few left over.
1736                 // Every chunk will either have size chunksize, or chunksize+1.
1737                 maxchunk = chunksize + 1;
1738                 paddingY = true;
1739                 assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1740                     "Unexpected size");
1741             }
1742             else
1743             {
1744                 // the extra bit is large enough that it's worth making a new chunk.
1745                 // (This means we're padding x with zeros, when doing the first one).
1746                 maxchunk = chunksize;
1747                 ++numchunks;
1748                 paddingY = false;
1749                 assert(extra + chunksize *(numchunks-1) == x.length,
1750                     "Unexpected size");
1751             }
1752             // We make the buffer a bit bigger so we have space for the partial sums.
1753             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1754             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1755             scratchbuff = scratchbuff[0 .. $ - y.length];
1756             size_t done; // how much of X have we done so far?
1757             if (paddingY)
1758             {
1759                 // If the first chunk is bigger, do it first. We're padding y.
1760                 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1761                     x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1762                 done = chunksize + (extra > 0 ? 1 : 0);
1763                 if (extra) --extra;
1764             }
1765             else
1766             {   // We're padding X. Begin with the extra bit.
1767                 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1768                 done = extra;
1769                 extra = 0;
1770             }
1771             immutable basechunksize = chunksize;
1772             while (done < x.length)
1773             {
1774                 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1775                 if (extra) --extra;
1776                 partial[] = result[done .. done+y.length];
1777                 mulKaratsuba(result[done .. done + y.length + chunksize],
1778                         x[done .. done+chunksize], y, scratchbuff);
1779                 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1780                 done += chunksize;
1781             }
1782             () @trusted { GC.free(scratchbuff.ptr); } ();
1783         }
1784     }
1785     else
1786     {
1787         // Balanced. Use Karatsuba directly.
1788         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1789         mulKaratsuba(result, x, y, scratchbuff);
1790         () @trusted { GC.free(scratchbuff.ptr); } ();
1791     }
1792 }
1793 
1794 // https://issues.dlang.org/show_bug.cgi?id=20493
1795 @safe unittest
1796 {
1797     // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1798     // the number itself isn't important, only the amount of digits, so we do a simpler
1799     // multiplication of the same size, analogous to:
1800     // 11111111 * 11111111 = 0123456787654321
1801     // but instead of base 10, it's in base `BigDigit`
1802 
1803     BigDigit[398] x = 1;
1804     BigDigit[236] y = 1;
1805     BigDigit[x.length + y.length] result;
1806     mulInternal(result[], x[], y[]);
1807 
1808     // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1809     BigDigit[x.length + y.length] expected = y.length;
1810     foreach (BigDigit i; 0 .. y.length)
1811     {
1812         expected[i] = i+1;
1813         expected[$-1-i] = i;
1814     }
1815 
1816     assert(result == expected);
1817 }
1818 
1819 /**  General unsigned squaring routine for BigInts.
1820  *   Sets result = x*x.
1821  *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1822  *   also be zero.
1823  */
1824 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1825 {
1826   import core.memory : GC;
1827   // Squaring is potentially half a multiply, plus add the squares of
1828   // the diagonal elements.
1829   assert(result.length == 2*x.length,
1830      "result needs to have twice the capacity of x");
1831   if (x.length <= KARATSUBASQUARELIMIT)
1832   {
1833       if (x.length == 1)
1834       {
1835          result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1836          return;
1837       }
1838       return squareSimple(result, x);
1839   }
1840   // The nice thing about squaring is that it always stays balanced
1841   BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1842   squareKaratsuba(result, x, scratchbuff);
1843   () @trusted { GC.free(scratchbuff.ptr); } ();
1844 }
1845 
1846 
1847 import core.bitop : bsr;
1848 
1849 /// if remainder is null, only calculate quotient.
1850 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1851         const BigDigit [] v) pure nothrow @safe
1852 {
1853     import core.memory : GC;
1854     assert(quotient.length == u.length - v.length + 1,
1855         "Invalid quotient length");
1856     assert(remainder == null || remainder.length == v.length,
1857         "Invalid remainder");
1858     assert(v.length > 1, "v must have more than 1 element");
1859     assert(u.length >= v.length, "u must be as longer or longer than v");
1860 
1861     // Normalize by shifting v left just enough so that
1862     // its high-order bit is on, and shift u left the
1863     // same amount. The highest bit of u will never be set.
1864 
1865     BigDigit [] vn = new BigDigit[v.length];
1866     BigDigit [] un = new BigDigit[u.length + 1];
1867     // How much to left shift v, so that its MSB is set.
1868     uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1869     if (s != 0)
1870     {
1871         multibyteShl(vn, v, s);
1872         un[$-1] = multibyteShl(un[0..$-1], u, s);
1873     }
1874     else
1875     {
1876         vn[] = v[];
1877         un[0..$-1] = u[];
1878         un[$-1] = 0;
1879     }
1880     if (quotient.length<FASTDIVLIMIT)
1881     {
1882         schoolbookDivMod(quotient, un, vn);
1883     }
1884     else
1885     {
1886         blockDivMod(quotient, un, vn);
1887     }
1888 
1889     // Unnormalize remainder, if required.
1890     if (remainder != null)
1891     {
1892         if (s == 0) remainder[] = un[0 .. vn.length];
1893         else multibyteShr(remainder, un[0 .. vn.length+1], s);
1894     }
1895     () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1896 }
1897 
1898 pure @safe unittest
1899 {
1900     immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1901     immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1902     uint [] q = new uint[u.length - v.length + 1];
1903     uint [] r = new uint[2];
1904     divModInternal(q, r, u, v);
1905     assert(q[]==[0xFFFF_FFFFu, 0]);
1906     assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1907     u = [0, 0xFFFF_FFFE, 0x8000_0001];
1908     v = [0xFFFF_FFFF, 0x8000_0000];
1909     divModInternal(q, r, u, v);
1910 }
1911 
1912 
1913 // Converts a big uint to a hexadecimal string.
1914 //
1915 // Optionally, a separator character (eg, an underscore) may be added between
1916 // every 8 digits.
1917 // buff.length must be data.length*8 if separator is zero,
1918 // or data.length*9 if separator is non-zero. It will be completely filled.
1919 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1920         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1921 {
1922     int x=0;
1923     for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1924     {
1925         toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1926         x+=8;
1927         if (separator)
1928         {
1929             if (i>0) buff[x] = separator;
1930             ++x;
1931         }
1932     }
1933     return buff;
1934 }
1935 
1936 /**
1937  * Convert a big uint into an octal string.
1938  *
1939  * Params:
1940  *  buff = The destination buffer for the octal string. Must be large enough to
1941  *      store the result, including leading zeroes, which is
1942  *      ceil(data.length * BigDigitBits / 3) characters.
1943  *      The buffer is filled from back to front, starting from `buff[$-1]`.
1944  *  data = The biguint to be converted.
1945  *
1946  * Returns: The index of the leading non-zero digit in `buff`. Will be
1947  * `buff.length - 1` if the entire big uint is zero.
1948  */
1949 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1950     pure nothrow @safe @nogc
1951 {
1952     ubyte carry = 0;
1953     int shift = 0;
1954     size_t penPos = buff.length - 1;
1955     size_t lastNonZero = buff.length - 1;
1956 
1957     pragma(inline) void output(uint digit) @nogc nothrow
1958     {
1959         if (digit != 0)
1960             lastNonZero = penPos;
1961         buff[penPos--] = cast(char)('0' + digit);
1962     }
1963 
1964     foreach (bigdigit; data)
1965     {
1966         if (shift < 0)
1967         {
1968             // Some bits were carried over from previous word.
1969             assert(shift > -3, "shift must be greater than -3");
1970             output(((bigdigit << -shift) | carry) & 0b111);
1971             shift += 3;
1972             assert(shift > 0, "shift must be 1 or greater");
1973         }
1974 
1975         while (shift <= BigDigitBits - 3)
1976         {
1977             output((bigdigit >>> shift) & 0b111);
1978             shift += 3;
1979         }
1980 
1981         if (shift < BigDigitBits)
1982         {
1983             // Some bits need to be carried forward.
1984             carry = (bigdigit >>> shift) & 0b11;
1985         }
1986         shift -= BigDigitBits;
1987         assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
1988     }
1989 
1990     if (shift < 0)
1991     {
1992         // Last word had bits that haven't been output yet.
1993         assert(shift > -3, "Shift must be greater than -3");
1994         output(carry);
1995     }
1996 
1997     return lastNonZero;
1998 }
1999 
2000 /** Convert a big uint into a decimal string.
2001  *
2002  * Params:
2003  *  data    The biguint to be converted. Will be destroyed.
2004  *  buff    The destination buffer for the decimal string. Must be
2005  *          large enough to store the result, including leading zeros.
2006  *          Will be filled backwards, starting from buff[$-1].
2007  *
2008  * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2009  * Returns:
2010  *    the lowest index of buff which was used.
2011  */
2012 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2013 {
2014     ptrdiff_t sofar = buff.length;
2015     // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2016     // the price of 3 divisions and a shr; this version only gives 27 digits
2017     // for 3 divisions.
2018     while (data.length>1)
2019     {
2020         uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2021         itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2022         sofar -= 9;
2023         if (data[$-1] == 0 && data.length > 1)
2024         {
2025             data.length = data.length - 1;
2026         }
2027     }
2028     itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2029     sofar -= 10;
2030     // and strip off the leading zeros
2031     while (sofar != buff.length-1 && buff[sofar] == '0')
2032         sofar++;
2033     return sofar;
2034 }
2035 
2036 /** Convert a decimal string into a big uint.
2037  *
2038  * Params:
2039  *  data    The biguint to be receive the result. Must be large enough to
2040  *          store the result.
2041  *  s       The decimal string. May contain _ or 0 .. 9
2042  *
2043  * The required length for the destination buffer is slightly less than
2044  *  1 + s.length/log2(10) = 1 + s.length/3.3219.
2045  *
2046  * Returns:
2047  *    the highest index of data which was used.
2048  */
2049 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2050 if (
2051     isInputRange!Range &&
2052     isSomeChar!(ElementType!Range) &&
2053     !isInfinite!Range)
2054 in
2055 {
2056     static if (hasLength!Range)
2057         assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2058             "data has a invalid length");
2059 }
2060 do
2061 {
2062     import std.conv : ConvException;
2063 
2064     // Convert to base 1e19 = 10_000_000_000_000_000_000.
2065     // (this is the largest power of 10 that will fit into a long).
2066     // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2067     // 485 bits will only just fit into 146 decimal digits.
2068     // As we convert the string, we record the number of digits we've seen in base 19:
2069     // hi is the number of digits/19, lo is the extra digits (0 to 18).
2070     // TODO: This is inefficient for very large strings (it is O(n^^2)).
2071     // We should take advantage of fast multiplication once the numbers exceed
2072     // Karatsuba size.
2073     uint lo = 0; // number of powers of digits, 0 .. 18
2074     uint x = 0;
2075     ulong y = 0;
2076     uint hi = 0; // number of base 1e19 digits
2077     data[0] = 0; // initially number is 0.
2078     if (data.length > 1)
2079         data[1] = 0;
2080 
2081     foreach (character; s)
2082     {
2083         if (character == '_')
2084             continue;
2085 
2086         if (character < '0' || character > '9')
2087             throw new ConvException("invalid digit");
2088         x *= 10;
2089         x += character - '0';
2090         ++lo;
2091         if (lo == 9)
2092         {
2093             y = x;
2094             x = 0;
2095         }
2096         if (lo == 18)
2097         {
2098             y *= 10_0000_0000;
2099             y += x;
2100             x = 0;
2101         }
2102         if (lo == 19)
2103         {
2104             y *= 10;
2105             y += x;
2106             x = 0;
2107             // Multiply existing number by 10^19, then add y1.
2108             if (hi>0)
2109             {
2110                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2111                 ++hi;
2112                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2113                 ++hi;
2114             }
2115             else
2116                 hi = 2;
2117             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2118             c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2119             if (c != 0)
2120             {
2121                 data[hi]=c;
2122                 ++hi;
2123             }
2124             y = 0;
2125             lo = 0;
2126         }
2127     }
2128     // Now set y = all remaining digits.
2129     if (lo >= 18)
2130     {
2131     }
2132     else if (lo >= 9)
2133     {
2134         for (int k=9; k<lo; ++k) y*=10;
2135         y+=x;
2136     }
2137     else
2138     {
2139         for (int k=0; k<lo; ++k) y*=10;
2140         y+=x;
2141     }
2142     if (lo != 0)
2143     {
2144         if (hi == 0)
2145         {
2146             data[0] = cast(uint) y;
2147             if (data.length == 1)
2148             {
2149                 hi = 1;
2150             }
2151             else
2152             {
2153                 data[1] = cast(uint)(y >>> 32);
2154                 hi=2;
2155             }
2156         }
2157         else
2158         {
2159             while (lo>0)
2160             {
2161                 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2162                 if (c != 0)
2163                 {
2164                     data[hi]=c;
2165                     ++hi;
2166                 }
2167                 --lo;
2168             }
2169             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2170             if (y > 0xFFFF_FFFFL)
2171             {
2172                 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2173             }
2174             if (c != 0)
2175             {
2176                 data[hi]=c;
2177                 ++hi;
2178             }
2179         }
2180     }
2181     while (hi>1 && data[hi-1]==0)
2182         --hi;
2183     return hi;
2184 }
2185 
2186 
2187 // ------------------------
2188 // These in-place functions are only for internal use; they are incompatible
2189 // with COW.
2190 
2191 // Classic 'schoolbook' multiplication.
2192 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2193         const(BigDigit)[] right) pure nothrow @safe
2194 in
2195 {
2196     assert(result.length == left.length + right.length,
2197         "Result must be able to store left + right");
2198     assert(right.length>1, "right must not be empty");
2199 }
2200 do
2201 {
2202     result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2203     multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2204 }
2205 
2206 // Classic 'schoolbook' squaring
2207 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2208 in
2209 {
2210     assert(result.length == 2*x.length, "result must be twice as long as x");
2211     assert(x.length>1, "x must not be empty");
2212 }
2213 do
2214 {
2215     multibyteSquare(result, x);
2216 }
2217 
2218 
2219 // add two uints of possibly different lengths. Result must be as long
2220 // as the larger length.
2221 // Returns carry (0 or 1).
2222 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2223 pure nothrow @safe
2224 in
2225 {
2226     assert(result.length == left.length,
2227         "result and left must be of the same length");
2228     assert(left.length >= right.length,
2229         "left must be longer or of equal length to right");
2230     assert(right.length > 0, "right must not be empty");
2231 }
2232 do
2233 {
2234     uint carry = multibyteAdd(result[0 .. right.length],
2235             left[0 .. right.length], right, 0);
2236     if (right.length < left.length)
2237     {
2238         result[right.length .. left.length] = left[right.length .. $];
2239         carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2240     }
2241     return carry;
2242 }
2243 
2244 //  result = left - right
2245 // returns carry (0 or 1)
2246 BigDigit subSimple(BigDigit [] result,const(BigDigit) [] left,
2247         const(BigDigit) [] right) pure nothrow
2248 in
2249 {
2250     assert(result.length == left.length,
2251         "result and left must be of the same length");
2252     assert(left.length >= right.length,
2253         "left must be longer or of equal length to right");
2254     assert(right.length > 0, "right must not be empty");
2255 }
2256 do
2257 {
2258     BigDigit carry = multibyteSub(result[0 .. right.length],
2259             left[0 .. right.length], right, 0);
2260     if (right.length < left.length)
2261     {
2262         result[right.length .. left.length] = left[right.length .. $];
2263         carry = multibyteIncrementAssign!('-')(result[right.length..$], carry);
2264     } //else if (result.length == left.length+1) { result[$-1] = carry; carry=0; }
2265     return carry;
2266 }
2267 
2268 
2269 /* result = result - right
2270  * Returns carry = 1 if result was less than right.
2271 */
2272 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2273 pure nothrow @safe
2274 {
2275     assert(result.length >= right.length,
2276        "result must be longer or of equal length to right");
2277     uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2278     if (c && result.length > right.length)
2279         c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2280     return c;
2281 }
2282 
2283 /* result = result + right
2284 */
2285 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2286 pure nothrow @safe
2287 {
2288     assert(result.length >= right.length,
2289        "result must be longer or of equal length to right");
2290     uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2291     if (c && result.length > right.length)
2292        c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2293     return c;
2294 }
2295 
2296 /* performs result += wantSub? - right : right;
2297 */
2298 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2299         bool wantSub) pure nothrow @safe
2300 {
2301     if (wantSub)
2302         return subAssignSimple(result, right);
2303     else
2304         return addAssignSimple(result, right);
2305 }
2306 
2307 
2308 // return true if x<y, considering leading zeros
2309 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2310 {
2311     assert(x.length >= y.length,
2312        "x must be longer or of equal length to y");
2313     auto k = x.length-1;
2314     while (x[k]==0 && k >= y.length)
2315         --k;
2316     if (k >= y.length)
2317         return false;
2318     while (k>0 && x[k]==y[k])
2319         --k;
2320     return x[k] < y[k];
2321 }
2322 
2323 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2324 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2325     pure nothrow @safe
2326 {
2327     assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2328         "result must capable to store the maximum of x and y");
2329 
2330     size_t minlen;
2331     bool negative;
2332     if (x.length >= y.length)
2333     {
2334         minlen = y.length;
2335         negative = less(x, y);
2336     }
2337     else
2338     {
2339        minlen = x.length;
2340        negative = !less(y, x);
2341     }
2342     const (BigDigit)[] large, small;
2343     if (negative)
2344     {
2345         large = y; small = x;
2346     }
2347     else
2348     {
2349         large = x; small = y;
2350     }
2351 
2352     BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2353     if (x.length != y.length)
2354     {
2355         result[minlen .. large.length]= large[minlen..$];
2356         result[large.length..$] = 0;
2357         if (carry)
2358             multibyteIncrementAssign!('-')(result[minlen..$], carry);
2359     }
2360     return negative;
2361 }
2362 
2363 /* Determine how much space is required for the temporaries
2364  * when performing a Karatsuba multiplication.
2365  * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2366  * https://issues.dlang.org/show_bug.cgi?id=20493
2367  */
2368 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2369 {
2370     return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2371 }
2372 
2373 /* Sets result = x*y, using Karatsuba multiplication.
2374 * x must be longer or equal to y.
2375 * Valid only for balanced multiplies, where x is not shorter than y.
2376 * It is superior to schoolbook multiplication if and only if
2377 *    sqrt(2)*y.length > x.length > y.length.
2378 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2379 * The maximum allowable length of x and y is uint.max; but better algorithms
2380 * should be used far before that length is reached.
2381 * Params:
2382 * scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2383 */
2384 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2385         const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2386 {
2387     assert(x.length >= y.length, "x must be greater or equal to y");
2388     assert(result.length < uint.max, "Operands too large");
2389     assert(result.length == x.length + y.length,
2390         "result must be as large as x + y");
2391     if (x.length <= KARATSUBALIMIT)
2392     {
2393         return mulSimple(result, x, y);
2394     }
2395     // Must be almost square (otherwise, a schoolbook iteration is better)
2396     assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2397         "Bigint Internal Error: Asymmetric Karatsuba");
2398 
2399     // The subtractive version of Karatsuba multiply uses the following result:
2400     // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2401     // where mid = (x0-x1)*(y0-y1)
2402     // requiring 3 multiplies of length N, instead of 4.
2403     // The advantage of the subtractive over the additive version is that
2404     // the mid multiply cannot exceed length N. But there are subtleties:
2405     // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2406     // retain all of the leading zeros in the subtractions
2407 
2408     // half length, round up.
2409     auto half = (x.length >> 1) + (x.length & 1);
2410 
2411     const(BigDigit) [] x0 = x[0 .. half];
2412     const(BigDigit) [] x1 = x[half .. $];
2413     const(BigDigit) [] y0 = y[0 .. half];
2414     const(BigDigit) [] y1 = y[half .. $];
2415     BigDigit [] mid = scratchbuff[0 .. half*2];
2416     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2417     BigDigit [] resultLow = result[0 .. 2*half];
2418     BigDigit [] resultHigh = result[2*half .. $];
2419      // initially use result to store temporaries
2420     BigDigit [] xdiff= result[0 .. half];
2421     BigDigit [] ydiff = result[half .. half*2];
2422 
2423     // First, we calculate mid, and sign of mid
2424     immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2425                       ^ inplaceSub(ydiff, y0, y1);
2426     mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2427 
2428     // Low half of result gets x0 * y0. High half gets x1 * y1
2429 
2430     mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2431 
2432     if (2L * y1.length * y1.length < x1.length * x1.length)
2433     {
2434         // an asymmetric situation has been created.
2435         // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2436         // Applying one schoolbook multiply gives us two pieces each 1.2:1
2437         if (y1.length <= KARATSUBALIMIT)
2438             mulSimple(resultHigh, x1, y1);
2439         else
2440         {
2441             // divide x1 in two, then use schoolbook multiply on the two pieces.
2442             auto quarter = (x1.length >> 1) + (x1.length & 1);
2443             immutable ysmaller = (quarter >= y1.length);
2444             mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2445                 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2446             // Save the part which will be overwritten.
2447             immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2448             newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2449             mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2450                 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2451 
2452             resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2453         }
2454     }
2455     else
2456         mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2457 
2458     /* We now have result = x0y0 + (N*N)*x1y1
2459        Before adding or subtracting mid, we must calculate
2460        result += N * (x0y0 + x1y1)
2461        We can do this with three half-length additions. With a = x0y0, b = x1y1:
2462                       aHI aLO
2463         +       aHI   aLO
2464         +       bHI   bLO
2465         +  bHI  bLO
2466         =  R3   R2    R1   R0
2467         R1 = aHI + bLO + aLO
2468         R2 = aHI + bLO + aHI + carry_from_R1
2469         R3 = bHi + carry_from_R2
2470 
2471      It might actually be quicker to do it in two full-length additions:
2472      newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2473      addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2474    */
2475     BigDigit[] R1 = result[half .. half*2];
2476     BigDigit[] R2 = result[half*2 .. half*3];
2477     BigDigit[] R3 = result[half*3..$];
2478     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2479     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2480     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2481     if (c1+c2)
2482         multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2483     if (c1+c3)
2484         multibyteIncrementAssign!('+')(R3, c1+c3);
2485 
2486     // And finally we subtract mid
2487     addOrSubAssignSimple(result[half..$], mid, !midNegative);
2488 }
2489 
2490 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2491         BigDigit [] scratchbuff) pure nothrow @safe
2492 {
2493     // See mulKaratsuba for implementation comments.
2494     // Squaring is simpler, since it never gets asymmetric.
2495     assert(result.length < uint.max, "Operands too large");
2496     assert(result.length == 2*x.length,
2497         "result must be twice the length of x");
2498     if (x.length <= KARATSUBASQUARELIMIT)
2499     {
2500         return squareSimple(result, x);
2501     }
2502     // half length, round up.
2503     auto half = (x.length >> 1) + (x.length & 1);
2504 
2505     const(BigDigit)[] x0 = x[0 .. half];
2506     const(BigDigit)[] x1 = x[half .. $];
2507     BigDigit [] mid = scratchbuff[0 .. half*2];
2508     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2509      // initially use result to store temporaries
2510     BigDigit [] xdiff= result[0 .. half];
2511     const BigDigit [] ydiff = result[half .. half*2];
2512 
2513     // First, we calculate mid. We don't need its sign
2514     inplaceSub(xdiff, x0, x1);
2515     squareKaratsuba(mid, xdiff, newscratchbuff);
2516 
2517     // Set result = x0x0 + (N*N)*x1x1
2518     squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2519     squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2520 
2521     /* result += N * (x0x0 + x1x1)
2522        Do this with three half-length additions. With a = x0x0, b = x1x1:
2523         R1 = aHI + bLO + aLO
2524         R2 = aHI + bLO + aHI + carry_from_R1
2525         R3 = bHi + carry_from_R2
2526     */
2527     BigDigit[] R1 = result[half .. half*2];
2528     BigDigit[] R2 = result[half*2 .. half*3];
2529     BigDigit[] R3 = result[half*3..$];
2530     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2531     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2532     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2533     if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2534     if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2535 
2536     // And finally we subtract mid, which is always positive
2537     subAssignSimple(result[half..$], mid);
2538 }
2539 
2540 /* Knuth's Algorithm D, as presented in
2541  * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2542  * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2543  * Given u and v, calculates  quotient  = u / v, u = u % v.
2544  * v must be normalized (ie, the MSB of v must be 1).
2545  * The most significant words of quotient and u may be zero.
2546  * u[0 .. v.length] holds the remainder.
2547  */
2548 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2549     pure nothrow @safe
2550 {
2551     assert(quotient.length == u.length - v.length,
2552         "quotient has wrong length");
2553     assert(v.length > 1, "v must not be empty");
2554     assert(u.length >= v.length, "u must be larger or equal to v");
2555     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2556     assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2557     // BUG: This code only works if BigDigit is uint.
2558     uint vhi = v[$-1];
2559     uint vlo = v[$-2];
2560 
2561     for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2562     {
2563         // Compute estimate of quotient[j],
2564         // qhat = (three most significant words of u)/(two most sig words of v).
2565         uint qhat;
2566         if (u[j + v.length] == vhi)
2567         {
2568             // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2569             qhat = uint.max;
2570         }
2571         else
2572         {
2573             uint ulo = u[j + v.length - 2];
2574             version (D_InlineAsm_X86)
2575             {
2576                 // Note: On DMD, this is only ~10% faster than the non-asm code.
2577                 uint *p = &u[j + v.length - 1];
2578                 asm pure nothrow @trusted
2579                 {
2580                     mov EAX, p;
2581                     mov EDX, [EAX+4];
2582                     mov EAX, [EAX];
2583                     div dword ptr [vhi];
2584                     mov qhat, EAX;
2585                     mov ECX, EDX;
2586 div3by2correction:
2587                     mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2588                     sub EAX, ulo;
2589                     sbb EDX, ECX;
2590                     jbe div3by2done;
2591                     mov EAX, qhat;
2592                     dec EAX;
2593                     mov qhat, EAX;
2594                     add ECX, dword ptr [vhi];
2595                     jnc div3by2correction;
2596 div3by2done:    ;
2597                 }
2598             }
2599             else
2600             { // version (InlineAsm)
2601                 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2602                 immutable bigqhat = uu / vhi;
2603                 ulong rhat =  uu - bigqhat * vhi;
2604                 qhat = cast(uint) bigqhat;
2605 again:
2606                 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2607                 {
2608                     --qhat;
2609                     rhat += vhi;
2610                     if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2611                         goto again;
2612                 }
2613             } // version (InlineAsm)
2614         }
2615         // Multiply and subtract.
2616         uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2617 
2618         if (u[j+v.length] < carry)
2619         {
2620             // If we subtracted too much, add back
2621             --qhat;
2622             carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2623         }
2624         quotient[j] = qhat;
2625         u[j + v.length] = u[j + v.length] - carry;
2626     }
2627 }
2628 
2629 // TODO: Replace with a library call
2630 void itoaZeroPadded(char[] output, uint value)
2631     pure nothrow @safe @nogc
2632 {
2633     for (auto i = output.length; i--;)
2634     {
2635         if (value < 10)
2636         {
2637             output[i] = cast(char)(value + '0');
2638             value = 0;
2639         }
2640         else
2641         {
2642             output[i] = cast(char)(value % 10 + '0');
2643             value /= 10;
2644         }
2645     }
2646 }
2647 
2648 void toHexZeroPadded(char[] output, uint value,
2649         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2650 {
2651     ptrdiff_t x = output.length - 1;
2652     static immutable string upperHexDigits = "0123456789ABCDEF";
2653     static immutable string lowerHexDigits = "0123456789abcdef";
2654     for ( ; x >= 0; --x)
2655     {
2656         if (letterCase == LetterCase.upper)
2657         {
2658             output[x] = upperHexDigits[value & 0xF];
2659         }
2660         else
2661         {
2662             output[x] = lowerHexDigits[value & 0xF];
2663         }
2664         value >>= 4;
2665     }
2666 }
2667 
2668 // Returns the highest value of i for which left[i]!=right[i],
2669 // or 0 if left[] == right[]
2670 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2671 pure nothrow @nogc @safe
2672 {
2673     assert(left.length == right.length,
2674         "left have a length equal to that of right");
2675     for (ptrdiff_t i = left.length - 1; i>0; --i)
2676     {
2677         if (left[i] != right[i])
2678             return i;
2679     }
2680     return 0;
2681 }
2682 
2683 // Returns the lowest value of i for which x[i]!=0.
2684 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2685 {
2686     int k = 0;
2687     while (x[k]==0)
2688     {
2689         ++k;
2690         assert(k < x.length, "k must be less than x.length");
2691     }
2692     return k;
2693 }
2694 
2695 /*
2696     Calculate quotient and remainder of u / v using fast recursive division.
2697     v must be normalised, and must be at least half as long as u.
2698     Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2699     scratch is temporary storage space, length must be >= quotient + 1.
2700 
2701 Returns:
2702     u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2703 
2704     Implements algorithm 1.8 from MCA.
2705     This algorithm has an annoying special case. After the first recursion, the
2706     highest bit of the quotient may be set. This means that in the second
2707     recursive call, the 'in' contract would be violated. (This happens only
2708     when the top quarter of u is equal to the top half of v. A base 10
2709     equivalent example of this situation is 5517/56; the first step gives
2710     55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2711     u and the quotient. 'mayOverflow' indicates that that the special case
2712     has occurred.
2713     (In MCA, a different strategy is used: the in contract is weakened, and
2714     schoolbookDivMod is more general: it allows the high bit of u to be set).
2715     See also:
2716     - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2717       Max-Planck Institute fuer Informatik, (Oct 1998).
2718 */
2719 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2720                      BigDigit[] scratch, bool mayOverflow = false)
2721                      pure nothrow @safe
2722 in
2723 {
2724     // v must be normalized
2725     assert(v.length > 1, "v must not be empty");
2726     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2727     assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2728     assert(quotient.length == u.length - v.length,
2729         "quotient must be of equal length of u - v");
2730     if (mayOverflow)
2731     {
2732         assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2733         assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2734     }
2735 
2736     // Must be symmetric. Use block schoolbook division if not.
2737     assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2738         "Invalid length of u");
2739     assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2740         "Invalid length of u");
2741     assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2742         "Invalid quotient length");
2743 }
2744 do
2745 {
2746     if (quotient.length < FASTDIVLIMIT)
2747     {
2748         return schoolbookDivMod(quotient, u, v);
2749     }
2750 
2751     // Split quotient into two halves, but keep padding in the top half
2752     auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2753 
2754     // RECURSION 1: Calculate the high half of the quotient
2755 
2756     // Note that if u and quotient were padded, they remain padded during
2757     // this call, so in contract is satisfied.
2758     recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2759         scratch, mayOverflow);
2760 
2761     // quotient[k..$] is our guess at the high quotient.
2762     // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2763     // first remainder. u[0 .. 2*k] is the low part.
2764 
2765     // Calculate the full first remainder to be
2766     //    remainder - highQuotient * lowDivisor
2767     // reducing highQuotient until the remainder is positive.
2768     // The low part of the remainder, u[0 .. k], cannot be altered by this.
2769 
2770     adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2771             scratch[0 .. quotient.length], mayOverflow);
2772 
2773     // RECURSION 2: Calculate the low half of the quotient
2774     // The full first remainder is now in u[0 .. k + v.length].
2775 
2776     if (u[k + v.length - 1] & 0x8000_0000)
2777     {
2778         // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2779         // This means we need an extra quotient word for the next recursion.
2780         // We need to restore the invariant for the recursive calls.
2781         // We do this by padding both u and quotient. Extending u is trivial,
2782         // because the higher words will not be used again. But for the
2783         // quotient, we're clobbering the low word of the high quotient,
2784         // so we need save it, and add it back in after the recursive call.
2785 
2786         auto clobberedQuotient = quotient[k];
2787         u[k+v.length] = 0;
2788 
2789         recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2790             v[k .. $], scratch, true);
2791         adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2792             scratch[0 .. 2 * k+1], true);
2793 
2794         // Now add the quotient word that got clobbered earlier.
2795         multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2796     }
2797     else
2798     {
2799         // The special case has NOT happened.
2800         recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2801             scratch, false);
2802 
2803         // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2804 
2805         adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2806             scratch[0 .. 2 * k]);
2807     }
2808 }
2809 
2810 // rem -= quot * v[0 .. k].
2811 // If would make rem negative, decrease quot until rem is >= 0.
2812 // Needs (quot.length * k) scratch space to store the result of the multiply.
2813 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2814         ptrdiff_t k,
2815         BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2816 {
2817     assert(rem.length == v.length, "rem must be as long as v");
2818     mulInternal(scratch, quot, v[0 .. k]);
2819     uint carry = 0;
2820     if (mayOverflow)
2821         carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2822     else
2823         carry = subAssignSimple(rem, scratch);
2824     while (carry)
2825     {
2826         multibyteIncrementAssign!('-')(quot, 1); // quot--
2827         carry -= multibyteAdd(rem, rem, v, 0);
2828     }
2829 }
2830 
2831 // Cope with unbalanced division by performing block schoolbook division.
2832 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2833 pure nothrow @safe
2834 {
2835     import core.memory : GC;
2836     assert(quotient.length == u.length - v.length,
2837         "quotient must be of equal length of u - v");
2838     assert(v.length > 1, "v must not be empty");
2839     assert(u.length >= v.length, "u must be longer or of equal length as v");
2840     assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2841     assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2842     BigDigit [] scratch = new BigDigit[v.length + 1];
2843 
2844     // Perform block schoolbook division, with 'v.length' blocks.
2845     auto m = u.length - v.length;
2846     while (m > v.length)
2847     {
2848         immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2849         BigDigit saveq;
2850         if (mayOverflow)
2851         {
2852             u[m + v.length] = 0;
2853             saveq = quotient[m];
2854         }
2855         recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2856             u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2857         if (mayOverflow)
2858         {
2859             assert(quotient[m] == 0, "quotient must not be 0");
2860             quotient[m] = saveq;
2861         }
2862         m -= v.length;
2863     }
2864     recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2865     () @trusted { GC.free(scratch.ptr); } ();
2866 }
2867 
2868 @system unittest
2869 {
2870     version (none)
2871     {
2872         import core.stdc.stdio;
2873 
2874         void printBiguint(const uint [] data)
2875         {
2876             char [] buff = biguintToHex(new char[data.length*9], data, '_');
2877             printf("%.*s\n", cast(int) buff.length, buff.ptr);
2878         }
2879 
2880         void printDecimalBigUint(BigUint data)
2881         {
2882             auto str = data.toDecimalString(0);
2883             printf("%.*s\n", cast(int) str.length, str.ptr);
2884         }
2885     }
2886 
2887     uint [] a, b;
2888     a = new uint[43];
2889     b = new uint[179];
2890     for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2891     for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2892 
2893     a[$-1] |= 0x8000_0000;
2894     uint [] r = new uint[a.length];
2895     uint [] q = new uint[b.length-a.length+1];
2896 
2897     divModInternal(q, r, b, a);
2898     q = q[0..$-1];
2899     uint [] r1 = r.dup;
2900     uint [] q1 = q.dup;
2901     blockDivMod(q, b, a);
2902     r = b[0 .. a.length];
2903     assert(r[] == r1[]);
2904     assert(q[] == q1[]);
2905 }
2906 
2907 // biguintToOctal
2908 @safe unittest
2909 {
2910     enum bufSize = 5 * BigDigitBits / 3 + 1;
2911     auto buf = new char[bufSize];
2912     size_t i;
2913     BigDigit[] data = [ 342391 ];
2914 
2915     // Basic functionality with single word
2916     i = biguintToOctal(buf, data);
2917     assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2918 
2919     // Test carrying bits between words
2920     data = [ 0x77053977, 0x39770539, 0x00000005 ];
2921     i = biguintToOctal(buf, data);
2922     assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2923 
2924     // Test carried bits in the last word
2925     data = [ 0x80000000 ];
2926     i = biguintToOctal(buf, data);
2927     assert(buf[i .. $] == "20000000000");
2928 
2929     // Test boundary between 3rd and 4th word where the number of bits is
2930     // divisible by 3 and no bits should be carried.
2931     //
2932     // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2933     // rollover happens, so that if any bugs happen in wrongly adding the carry
2934     // to the next word, non-zero bits will show up in the output.
2935     data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2936     i = biguintToOctal(buf, data);
2937     assert(buf[i .. $] == "2060000000001400000000030000000000");
2938 
2939     // Boundary case: 0
2940     data = [ 0 ];
2941     i = biguintToOctal(buf, data);
2942     assert(buf[i .. $] == "0");
2943 }