1 /** Fundamental operations for arbitrary-precision arithmetic
2  *
3  * These functions are for internal use only.
4  */
5 /*          Copyright Don Clugston 2008 - 2010.
6  * Distributed under the Boost Software License, Version 1.0.
7  *    (See accompanying file LICENSE_1_0.txt or copy at
8  *          http://www.boost.org/LICENSE_1_0.txt)
9  */
10 /* References:
11    "Modern Computer Arithmetic" (MCA) is the primary reference for all
12     algorithms used in this library.
13   - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14     Version 0.5.9, (Oct 2010).
15   - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16     Max-Planck Institute fuer Informatik, (Oct 1998).
17   - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18     INRIA 4664, (Dec 2002).
19   - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20     http://bodrato.it/papers (2006).
21   - A. Fog, "Optimizing subroutines in assembly language",
22     www.agner.org/optimize (2008).
23   - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24     www.agner.org/optimize (2008).
25   - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26     and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27 
28 Idioms:
29   Many functions in this module use
30   'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31   in order to disable implicit conversion.
32 
33 */
34 module std.internal.math.biguintcore;
35 
36 version (D_InlineAsm_X86)
37 {
38     static import std.internal.math.biguintx86;
39 }
40 static import std.internal.math.biguintnoasm;
41 
42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43     KARATSUBASQUARELIMIT;
44 
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47 
48 private import std.traits;
49 private import std.range.primitives;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53 
54 private:
55 
56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58 // used by the other architectures too.
59 // See comments below in case it has to be refactored.
60 version (X86)
61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62 {
63     // must be checked before, otherwise D_InlineAsm_X86 is true.
64     if (__ctfe)
65         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66     // Runtime.
67     else version (D_InlineAsm_X86)
68         return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69     // Runtime if no asm available.
70     else
71         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72 }
73 // Any other architecture
74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75 
76 version (X86)
77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78 {
79     if (__ctfe)
80         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81     else version (D_InlineAsm_X86)
82         return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83     else
84         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85 }
86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87 
88 version (X86)
89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90 {
91     if (__ctfe)
92         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93     else version (D_InlineAsm_X86)
94         return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95     else
96         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97 }
98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99 
100 version (X86)
101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102 {
103     if (__ctfe)
104         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105     else version (D_InlineAsm_X86)
106         std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107     else
108         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109 }
110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111 
112 version (X86)
113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114 {
115     if (__ctfe)
116         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117     else version (D_InlineAsm_X86)
118         return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119     else
120         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121 }
122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123 
124 version (X86)
125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126 {
127     if (__ctfe)
128         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129     else version (D_InlineAsm_X86)
130         return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131     else
132         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133 }
134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135 
136 version (X86)
137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138 {
139     if (__ctfe)
140         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141     else version (D_InlineAsm_X86)
142         std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143     else
144         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145 }
146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147 
148 version (X86)
149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150 {
151     if (__ctfe)
152         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153     else version (D_InlineAsm_X86)
154         return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155     else
156         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157 }
158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159 
160 version (X86)
161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162 {
163     if (__ctfe)
164         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165     else version (D_InlineAsm_X86)
166         std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167     else
168         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169 }
170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171 
172 version (X86)
173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174 {
175     if (__ctfe)
176         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177     else version (D_InlineAsm_X86)
178         std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179     else
180         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181 }
182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183 
184 version (X86)
185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186 {
187     if (__ctfe)
188         std.internal.math.biguintnoasm.multibyteSquare(result, x);
189     else version (D_InlineAsm_X86)
190         std.internal.math.biguintx86.multibyteSquare(result, x);
191     else
192         std.internal.math.biguintnoasm.multibyteSquare(result, x);
193 }
194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195 
196 // Limits for when to switch between algorithms.
197 // Half the size of the data cache.
198 @nogc nothrow pure @safe size_t getCacheLimit()
199 {
200     import core.cpuid : dataCaches;
201     return dataCaches[0].size * 1024 / 2;
202 }
203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204 
205 
206 // These constants are used by shift operations
207 static if (BigDigit.sizeof == int.sizeof)
208 {
209     enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210     alias BIGHALFDIGIT = ushort;
211 }
212 else static if (BigDigit.sizeof == long.sizeof)
213 {
214     alias BIGHALFDIGIT = uint;
215     enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216 }
217 else static assert(0, "Unsupported BigDigit size");
218 
219 import std.exception : assumeUnique;
220 import std.traits : isIntegral;
221 enum BigDigitBits = BigDigit.sizeof*8;
222 template maxBigDigits(T)
223 if (isIntegral!T)
224 {
225     enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226 }
227 
228 static immutable BigDigit[] ZERO = [0];
229 static immutable BigDigit[] ONE = [1];
230 static immutable BigDigit[] TWO = [2];
231 static immutable BigDigit[] TEN = [10];
232 
233 
234 public:
235 
236 /// BigUint performs memory management and wraps the low-level calls.
237 struct BigUint
238 {
239 private:
240     pure invariant()
241     {
242         assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243                 "Invariant requires data to not empty or zero");
244     }
245 
246     immutable(BigDigit) [] data = ZERO;
247 
248     this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249     {
250        data = x;
251     }
252   package(std)  // used from: std.bigint
253     this(T)(T x) pure nothrow @safe scope if (isIntegral!T)
254     {
255         opAssign(x);
256     }
257 
258     enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
259         return assumeUnique(input);
260     };
261 public:
262     // Length in uints
263     @property size_t uintLength() pure nothrow const @safe @nogc scope
264     {
265         static if (BigDigit.sizeof == uint.sizeof)
266         {
267             return data.length;
268         }
269         else static if (BigDigit.sizeof == ulong.sizeof)
270         {
271             return data.length * 2 -
272             ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
273         }
274     }
275     @property size_t ulongLength() pure nothrow const @safe @nogc scope
276     {
277         static if (BigDigit.sizeof == uint.sizeof)
278         {
279             return (data.length + 1) >> 1;
280         }
281         else static if (BigDigit.sizeof == ulong.sizeof)
282         {
283             return data.length;
284         }
285     }
286 
287     // The value at (cast(ulong[]) data)[n]
288     ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
289     {
290         static if (BigDigit.sizeof == int.sizeof)
291         {
292             if (data.length == n*2 + 1) return data[n*2];
293             return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
294         }
295         else static if (BigDigit.sizeof == long.sizeof)
296         {
297             return data[n];
298         }
299     }
300 
301     uint peekUint(size_t n) pure nothrow const @safe @nogc scope
302     {
303         static if (BigDigit.sizeof == int.sizeof)
304         {
305             return data[n];
306         }
307         else
308         {
309             immutable x = data[n >> 1];
310             return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
311         }
312     }
313 
314     ///
315     void opAssign(Tulong)(Tulong u) pure nothrow @safe scope if (is (Tulong == ulong))
316     {
317         if (u == 0) data = ZERO;
318         else if (u == 1) data = ONE;
319         else if (u == 2) data = TWO;
320         else if (u == 10) data = TEN;
321         else
322         {
323             static if (BigDigit.sizeof == int.sizeof)
324             {
325                 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
326                 uint uhi = cast(uint)(u >> 32);
327                 if (uhi == 0)
328                 {
329                     data = [ulo];
330                 }
331                 else
332                 {
333                     data = [ulo, uhi];
334                 }
335             }
336             else static if (BigDigit.sizeof == long.sizeof)
337             {
338                 data = [u];
339             }
340         }
341     }
342     void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
343     {
344         this.data = y.data;
345     }
346 
347     ///
348     int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
349     {
350         if (data.length != y.data.length)
351             return (data.length > y.data.length) ?  1 : -1;
352         size_t k = highestDifferentDigit(data, y.data);
353         if (data[k] == y.data[k])
354             return 0;
355         return data[k] > y.data[k] ? 1 : -1;
356     }
357 
358     ///
359     int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope if (is (Tulong == ulong))
360     {
361         if (data.length > maxBigDigits!Tulong)
362             return 1;
363 
364         foreach_reverse (i; 0 .. maxBigDigits!Tulong)
365         {
366             BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
367             if (tmp == 0)
368                 if (data.length >= i+1)
369                 {
370                     // Since ZERO is [0], so we cannot simply return 1 here, as
371                     // data[i] would be 0 for i == 0 in that case.
372                     return (data[i] > 0) ? 1 : 0;
373                 }
374                 else
375                     continue;
376             else
377                 if (i+1 > data.length)
378                     return -1;
379                 else if (tmp != data[i])
380                     return data[i] > tmp ? 1 : -1;
381         }
382         return 0;
383     }
384 
385     bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
386     {
387            return y.data[] == data[];
388     }
389 
390     bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
391     {
392         if (data.length > 2)
393             return false;
394         uint ylo = cast(uint)(y & 0xFFFF_FFFF);
395         uint yhi = cast(uint)(y >> 32);
396         if (data.length == 2 && data[1]!=yhi)
397             return false;
398         if (data.length == 1 && yhi != 0)
399             return false;
400         return (data[0] == ylo);
401     }
402 
403     bool isZero() pure const nothrow @safe @nogc scope
404     {
405         return data.length == 1 && data[0] == 0;
406     }
407 
408     size_t numBytes() pure nothrow const @safe @nogc scope
409     {
410         return data.length * BigDigit.sizeof;
411     }
412 
413     // the extra bytes are added to the start of the string
414     char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
415     {
416         immutable predictlength = 20+20*(data.length/2); // just over 19
417         char [] buff = new char[frontExtraBytes + predictlength];
418         ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
419         return buff[sofar-frontExtraBytes..$];
420     }
421 
422     /** Convert to a hex string, printing a minimum number of digits 'minPadding',
423      *  allocating an additional 'frontExtraBytes' at the start of the string.
424      *  Padding is done with padChar, which may be '0' or ' '.
425      *  'separator' is a digit separation character. If non-zero, it is inserted
426      *  between every 8 digits.
427      *  Separator characters do not contribute to the minPadding.
428      */
429     char [] toHexString(int frontExtraBytes, char separator = 0,
430             int minPadding=0, char padChar = '0',
431             LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
432     {
433         // Calculate number of extra padding bytes
434         size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
435             ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
436 
437         // Length not including separator bytes
438         size_t lenBytes = data.length * 2 * BigDigit.sizeof;
439 
440         // Calculate number of separator bytes
441         size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
442         immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
443 
444         char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
445         biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
446         if (extraPad > 0)
447         {
448             if (separator)
449             {
450                 size_t start = frontExtraBytes; // first index to pad
451                 if (extraPad &7)
452                 {
453                     // Do 1 to 7 extra zeros.
454                     buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
455                     buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
456                     start += (extraPad & 7) + 1;
457                 }
458                 for (int i=0; i< (extraPad >> 3); ++i)
459                 {
460                     buff[start .. start + 8] = padChar;
461                     buff[start + 8] = (padChar == ' ' ? ' ' : separator);
462                     start += 9;
463                 }
464             }
465             else
466             {
467                 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
468             }
469         }
470         int z = frontExtraBytes;
471         if (lenBytes > minPadding)
472         {
473             // Strip leading zeros.
474             ptrdiff_t maxStrip = lenBytes - minPadding;
475             while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
476             {
477                 ++z;
478                 --maxStrip;
479             }
480         }
481         if (padChar!='0')
482         {
483             // Convert leading zeros into padChars.
484             for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
485             {
486                 if (buff[k]=='0') buff[k]=padChar;
487             }
488         }
489         return buff[z-frontExtraBytes..$];
490     }
491 
492     /**
493      * Convert to an octal string.
494      */
495     char[] toOctalString() pure nothrow @safe const scope
496     {
497         auto predictLength = 1 + data.length*BigDigitBits / 3;
498         char[] buff = new char[predictLength];
499         size_t firstNonZero = biguintToOctal(buff, data);
500         return buff[firstNonZero .. $];
501     }
502 
503     // return false if invalid character found
504     bool fromHexString(Range)(Range s) scope if (
505         isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
506     {
507         import std.range : walkLength;
508 
509         //Strip leading zeros
510         while (!s.empty && s.front == '0')
511             s.popFront;
512 
513         if (s.empty)
514         {
515             data = ZERO;
516             return true;
517         }
518 
519         immutable len = (s.save.walkLength + 15) / 4;
520         auto tmp = new BigDigit[len + 1];
521         uint part, sofar, partcount;
522 
523         foreach_reverse (character; s)
524         {
525             if (character == '_')
526                 continue;
527 
528             uint x;
529             if (character >= '0' && character <= '9')
530             {
531                 x = character - '0';
532             }
533             else if (character >= 'A' && character <= 'F')
534             {
535                 x = character - 'A' + 10;
536             }
537             else if (character >= 'a' && character <= 'f')
538             {
539                 x = character - 'a' + 10;
540             }
541             else
542             {
543                 return false;
544             }
545 
546             part >>= 4;
547             part |= (x << (32 - 4));
548             ++partcount;
549 
550             if (partcount == 8)
551             {
552                 tmp[sofar] = part;
553                 ++sofar;
554                 partcount = 0;
555                 part = 0;
556             }
557         }
558         if (part)
559         {
560             for ( ; partcount != 8; ++partcount) part >>= 4;
561             tmp[sofar] = part;
562             ++sofar;
563         }
564         if (sofar == 0)
565             data = ZERO;
566         else
567             data = trustedAssumeUnique(tmp[0 .. sofar]);
568 
569         return true;
570     }
571 
572     // return true if OK; false if erroneous characters found
573     bool fromDecimalString(Range)(Range s) scope if (
574         isForwardRange!Range && isSomeChar!(ElementType!Range))
575     {
576         import std.range : walkLength;
577 
578         while (!s.empty && s.front == '0')
579         {
580             s.popFront;
581         }
582 
583         if (s.empty)
584         {
585             data = ZERO;
586             return true;
587         }
588 
589         auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
590         auto tmp = new BigDigit[predict_length];
591 
592         tmp.length = biguintFromDecimal(tmp, s);
593 
594         data = trustedAssumeUnique(tmp);
595         return true;
596     }
597 
598     void fromMagnitude(Range)(Range magnitude) scope
599         if (isInputRange!Range
600             && (isForwardRange!Range || hasLength!Range)
601             && isUnsigned!(ElementType!Range))
602     {
603         while (!magnitude.empty && magnitude.front == 0)
604             magnitude.popFront;
605         static if (hasLength!Range)
606             immutable inputLen = magnitude.length;
607         else
608             immutable inputLen = magnitude.save.walkLength;
609         if (!inputLen)
610         {
611             this.data = ZERO;
612             return;
613         }
614         // `magnitude` has its most significant element first but BigUint.data
615         // stores the most significant last.
616         BigDigit[] newDigits;
617         alias E = ElementType!Range;
618         static if (E.sizeof == BigDigit.sizeof)
619         {
620             newDigits = new BigDigit[inputLen];
621             foreach_reverse (ref digit; newDigits)
622             {
623                 digit = magnitude.front;
624                 magnitude.popFront();
625             }
626         }
627         else static if (E.sizeof < BigDigit.sizeof)
628         {
629             enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
630             newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
631             immutable remainder = inputLen % elementsPerDigit;
632             // If there is a remainder specially assemble the most significant digit.
633             if (remainder)
634             {
635                 BigDigit tmp = magnitude.front;
636                 magnitude.popFront();
637                 foreach (_; 1 .. remainder)
638                 {
639                     tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
640                     magnitude.popFront();
641                 }
642                 newDigits[$-1] = tmp;
643             }
644             // Assemble full digits from most to least significant.
645             foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
646             {
647                 BigDigit tmp;
648                 static foreach (n; 0 .. elementsPerDigit)
649                 {
650                     tmp |= cast(BigDigit) magnitude.front <<
651                         ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
652                     magnitude.popFront();
653                 }
654                 digit = tmp;
655             }
656         }
657         else static if (E.sizeof > BigDigit.sizeof)
658         {
659             enum digitsPerElement = E.sizeof / BigDigit.sizeof;
660             newDigits = new BigDigit[inputLen * digitsPerElement];
661             size_t i = newDigits.length - 1;
662             foreach (element; magnitude)
663             {
664                 static foreach (n; 0 .. digitsPerElement)
665                     newDigits[i - n] =
666                         cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
667                 i -= digitsPerElement;
668             }
669             while (newDigits[$-1] == 0)
670                 newDigits = newDigits[0 .. $-1];
671         }
672         else
673             static assert(0);
674         this.data = trustedAssumeUnique(newDigits);
675         return;
676     }
677 
678     nothrow pure @safe unittest
679     {
680         immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
681         // Internal representation is most-significant-last but `fromMagnitude`
682         // argument is most-significant-first.
683         immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
684         BigUint b;
685         // Test with reference magnitude.
686         b.fromMagnitude(referenceMagnitude);
687         assert(b.data == referenceData);
688         // Test ubyte array.
689         import std.bitmanip : nativeToBigEndian;
690         ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
691             nativeToBigEndian(referenceMagnitude[1]) ~
692             nativeToBigEndian(referenceMagnitude[2]);
693         b.data = ZERO;
694         b.fromMagnitude(ubyteMagnitude);
695         assert(b.data == referenceData);
696         // Test ulong array.
697         static if (BigDigit.sizeof == uint.sizeof)
698             immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
699                 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
700             ];
701         else static if (BigDigit.sizeof == ulong.sizeof)
702             alias ulongMagnitude = referenceMagnitude;
703         b.data = ZERO;
704         b.fromMagnitude(ulongMagnitude);
705         assert(b.data == referenceData);
706     }
707 
708     ////////////////////////
709     //
710     // All of these member functions create a new BigUint.
711 
712     // return x >> y
713     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
714         if (op == ">>" && is (Tulong == ulong))
715     {
716         assert(y > 0, "Can not right shift BigUint by 0");
717         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
718         if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
719         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
720         if (bits == 0)
721         {
722             return BigUint(data[words..$]);
723         }
724         else
725         {
726             uint [] result = new BigDigit[data.length - words];
727             multibyteShr(result, data[words..$], bits);
728 
729             if (result.length > 1 && result[$-1] == 0)
730                 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
731             else
732                 return BigUint(trustedAssumeUnique(result));
733         }
734     }
735 
736     // return x << y
737     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
738         if (op == "<<" && is (Tulong == ulong))
739     {
740         assert(y > 0, "Can not left shift BigUint by 0");
741         if (isZero()) return this;
742         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
743         assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
744                 "Shift result exceeds temporary store");
745         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
746         BigDigit [] result = new BigDigit[data.length + words+1];
747         result[0 .. words] = 0;
748         if (bits == 0)
749         {
750             result[words .. words+data.length] = data[];
751             return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
752         }
753         else
754         {
755             immutable c = multibyteShl(result[words .. words+data.length], data, bits);
756             if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
757             result[$-1] = c;
758             return BigUint(trustedAssumeUnique(result));
759         }
760     }
761 
762     // If wantSub is false, return x + y, leaving sign unchanged
763     // If wantSub is true, return abs(x - y), negating sign if x < y
764     static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y,
765             bool wantSub, ref bool sign) pure nothrow @safe if (is(Tulong == ulong))
766     {
767         BigUint r;
768         if (wantSub)
769         {   // perform a subtraction
770             if (x.data.length > 2)
771             {
772                 // subInt returns GC allocated array, can be safely cast to immutable
773                 r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
774             }
775             else
776             {   // could change sign!
777                 ulong xx = x.data[0];
778                 if (x.data.length > 1)
779                     xx += (cast(ulong) x.data[1]) << 32;
780                 ulong d;
781                 if (xx <= y)
782                 {
783                     d = y - xx;
784                     sign = !sign;
785                 }
786                 else
787                 {
788                     d = xx - y;
789                 }
790                 if (d == 0)
791                 {
792                     r = 0UL;
793                     sign = false;
794                     return r;
795                 }
796                 if (d > uint.max)
797                 {
798                     r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
799                 }
800                 else
801                 {
802                     r.data = [cast(uint)(d & 0xFFFF_FFFF)];
803                 }
804             }
805         }
806         else
807         {
808             // addInt returns GC allocated array, can be safely cast to immutable
809             r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
810         }
811         return r;
812     }
813 
814     // If wantSub is false, return x + y, leaving sign unchanged.
815     // If wantSub is true, return abs(x - y), negating sign if x<y
816     static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
817         pure nothrow @safe
818     {
819         BigUint r;
820         if (wantSub)
821         {   // perform a subtraction
822             bool negative;
823             // sub returns GC allocated array, can be safely cast to immutable
824             r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
825             sign ^= negative;
826             if (r.isZero())
827             {
828                 sign = false;
829             }
830         }
831         else
832         {
833             // add returns GC allocated array, can be safely cast to immutable
834             r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
835         }
836         return r;
837     }
838 
839 
840     //  return x*y.
841     static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
842     {
843         if (y == 0 || x == 0) return BigUint(ZERO);
844         static if (T.sizeof * 8 <= 32)
845             uint hi = 0;
846         else
847             uint hi = cast(uint) (y >>> 32);
848         uint lo = cast(uint) (y & 0xFFFF_FFFF);
849         uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
850         result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
851         if (hi != 0)
852         {
853             result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
854                 x.data, hi, 0);
855         }
856         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
857     }
858 
859     /*  return x * y.
860      */
861     static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
862     {
863         if (y == 0 || x == 0)
864             return BigUint(ZERO);
865         auto len = x.data.length + y.data.length;
866         BigDigit [] result = new BigDigit[len];
867         if (y.data.length > x.data.length)
868         {
869             mulInternal(result, y.data, x.data);
870         }
871         else
872         {
873             if (x.data[]==y.data[]) squareInternal(result, x.data);
874             else mulInternal(result, x.data, y.data);
875         }
876         // the highest element could be zero,
877         // in which case we need to reduce the length
878         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
879     }
880 
881     // return x / y
882     static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
883     if ( is(immutable T == immutable uint) )
884     {
885         uint y = y_;
886         if (y == 1)
887             return x;
888         uint [] result = new BigDigit[x.data.length];
889         if ((y&(-y))==y)
890         {
891             assert(y != 0, "BigUint division by zero");
892             // perfect power of 2
893             uint b = 0;
894             for (;y != 1; y>>=1)
895             {
896                 ++b;
897             }
898             multibyteShr(result, x.data, b);
899         }
900         else
901         {
902             result[] = x.data[];
903             cast(void) multibyteDivAssign(result, y, 0);
904         }
905         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
906     }
907 
908     static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
909     if ( is(immutable T == immutable ulong) )
910     {
911         if (y <= uint.max)
912             return divInt!uint(x, cast(uint) y);
913         if (x.data.length < 2)
914             return BigUint(ZERO);
915         uint hi = cast(uint)(y >>> 32);
916         uint lo = cast(uint)(y & 0xFFFF_FFFF);
917         immutable uint[2] z = [lo, hi];
918         BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
919         divModInternal(result, null, x.data, z[]);
920         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
921     }
922 
923     // return x % y
924     static uint modInt(T)(scope BigUint x, T y_) pure if ( is(immutable T == immutable uint) )
925     {
926         import core.memory : GC;
927         uint y = y_;
928         assert(y != 0, "% 0 not allowed");
929         if ((y&(-y)) == y)
930         {   // perfect power of 2
931             return x.data[0] & (y-1);
932         }
933         else
934         {
935             // horribly inefficient - malloc, copy, & store are unnecessary.
936             uint [] wasteful = new BigDigit[x.data.length];
937             wasteful[] = x.data[];
938             immutable rem = multibyteDivAssign(wasteful, y, 0);
939             () @trusted { GC.free(wasteful.ptr); } ();
940             return rem;
941         }
942     }
943 
944     // return x / y
945     static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
946     {
947         if (y.data.length > x.data.length)
948             return BigUint(ZERO);
949         if (y.data.length == 1)
950             return divInt(x, y.data[0]);
951         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
952            divModInternal(result, null, x.data, y.data);
953         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
954     }
955 
956     // return x % y
957     static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
958     {
959         if (y.data.length > x.data.length) return x;
960         if (y.data.length == 1)
961         {
962             return BigUint([modInt(x, y.data[0])]);
963         }
964         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
965         BigDigit [] rem = new BigDigit[y.data.length];
966         divModInternal(result, rem, x.data, y.data);
967         return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
968     }
969 
970     // Return x / y in quotient, x % y in remainder
971     static void divMod(BigUint x, scope BigUint y,
972                        out BigUint quotient, out BigUint remainder) pure nothrow @safe
973     {
974         /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
975         if (y.data.length > x.data.length)
976         {
977             quotient = 0uL;
978             remainder = x;
979         }
980         else if (y.data.length == 1)
981         {
982             quotient = divInt(x, y.data[0]);
983             remainder = BigUint([modInt(x, y.data[0])]);
984         }
985         else
986         {
987             BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
988             BigDigit[] rem = new BigDigit[y.data.length];
989             divModInternal(quot, rem, x.data, y.data);
990             quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
991             remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
992         }
993     }
994 
995     // return x op y
996     static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
997     pure nothrow @safe if (op == "|" || op == "^" || op == "&")
998     {
999         auto d1 = includeSign(x.data, y.uintLength, xSign);
1000         auto d2 = includeSign(y.data, x.uintLength, ySign);
1001 
1002         foreach (i; 0 .. d1.length)
1003         {
1004             mixin("d1[i] " ~ op ~ "= d2[i];");
1005         }
1006 
1007         mixin("resultSign = xSign " ~ op ~ " ySign;");
1008 
1009         if (resultSign)
1010         {
1011             twosComplement(d1, d1);
1012         }
1013 
1014         return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1015     }
1016 
1017     /**
1018      * Return a BigUint which is x raised to the power of y.
1019      * Method: Powers of 2 are removed from x, then left-to-right binary
1020      * exponentiation is used.
1021      * Memory allocation is minimized: at most one temporary BigUint is used.
1022      */
1023     static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1024     {
1025         // Deal with the degenerate cases first.
1026         if (y == 0) return BigUint(ONE);
1027         if (y == 1) return x;
1028         if (x == 0 || x == 1) return x;
1029 
1030         BigUint result;
1031 
1032         // Simplify, step 1: Remove all powers of 2.
1033         uint firstnonzero = firstNonZeroDigit(x.data);
1034         // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1035         // where BigDigitBits = BigDigit.sizeof * 8
1036 
1037         // See if x[firstnonzero..$] can now fit into a single digit.
1038         bool singledigit = ((x.data.length - firstnonzero) == 1);
1039         // If true, then x0 is that digit
1040         // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1041         BigDigit x0 = x.data[firstnonzero];
1042         assert(x0 != 0, "pow(0, y) not allowed");
1043         // Length of the non-zero portion
1044         size_t nonzerolength = x.data.length - firstnonzero;
1045         ulong y0;
1046         uint evenbits = 0; // number of even bits in the bottom of x
1047         while (!(x0 & 1))
1048         {
1049             x0 >>= 1;
1050             ++evenbits;
1051         }
1052 
1053         if (x.data.length- firstnonzero == 2)
1054         {
1055             // Check for a single digit straddling a digit boundary
1056             const BigDigit x1 = x.data[firstnonzero+1];
1057             if ((x1 >> evenbits) == 0)
1058             {
1059                 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1060                 singledigit = true;
1061             }
1062         }
1063         // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1064 
1065         uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1066 
1067         // Simplify, step 2: For singledigits, see if we can trivially reduce y
1068 
1069         BigDigit finalMultiplier = 1UL;
1070 
1071         if (singledigit)
1072         {
1073             // x fits into a single digit. Raise it to the highest power we can
1074             // that still fits into a single digit, then reduce the exponent accordingly.
1075             // We're quite likely to have a residual multiply at the end.
1076             // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1077             // and 5^^13 still fits into a uint.
1078             evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1079             if (x0 == 1)
1080             {   // Perfect power of 2
1081                 result = 1UL;
1082                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1083             }
1084             immutable p = highestPowerBelowUintMax(x0);
1085             if (y <= p)
1086             {   // Just do it with pow
1087                 result = cast(ulong) intpow(x0, y);
1088                 if (evenbits + firstnonzero == 0)
1089                     return result;
1090                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1091             }
1092             y0 = y / p;
1093             finalMultiplier = intpow(x0, y - y0*p);
1094             x0 = intpow(x0, p);
1095             // Result is x0
1096             nonzerolength = 1;
1097         }
1098         // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1099 
1100         // Perform a crude check for overflow and allocate result buffer.
1101         // The length required is y * lg2(x) bits.
1102         // which will always fit into y*x.length digits. But this is
1103         // a gross overestimate if x is small (length 1 or 2) and the highest
1104         // digit is nearly empty.
1105         // A better estimate is:
1106         //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1107         //  and the first term is always between
1108         //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1109         //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1110         // For single digit payloads, we already have
1111         //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1112         // and x0 is almost a full digit, so it's a tight estimate.
1113         // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1114         // Note that the divisions must be rounded up.
1115 
1116         // Estimated length in BigDigits
1117         immutable estimatelength = singledigit
1118             ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1119             :  x.data.length * y;
1120         // Imprecise check for overflow. Makes the extreme cases easier to debug
1121         // (less extreme overflow will result in an out of memory error).
1122         if (estimatelength > uint.max/(4*BigDigit.sizeof))
1123             assert(0, "Overflow in BigInt.pow");
1124 
1125         // The result buffer includes space for all the trailing zeros
1126         BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1127 
1128         // Do all the powers of 2!
1129         size_t result_start = cast(size_t)( firstnonzero * y
1130             + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1131 
1132         resultBuffer[0 .. result_start] = 0;
1133         BigDigit [] t1 = resultBuffer[result_start..$];
1134         BigDigit [] r1;
1135 
1136         if (singledigit)
1137         {
1138             r1 = t1[0 .. 1];
1139             r1[0] = x0;
1140             y = y0;
1141         }
1142         else
1143         {
1144             // It's not worth right shifting by evenbits unless we also shrink the length after each
1145             // multiply or squaring operation. That might still be worthwhile for large y.
1146             r1 = t1[0 .. x.data.length - firstnonzero];
1147             r1[0..$] = x.data[firstnonzero..$];
1148         }
1149 
1150         if (y>1)
1151         {   // Set r1 = r1 ^^ y.
1152             // The secondary buffer only needs space for the multiplication results
1153             BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1154             BigDigit [] r2;
1155 
1156             int shifts = 63; // num bits in a long
1157             while (!(y & 0x8000_0000_0000_0000L))
1158             {
1159                 y <<= 1;
1160                 --shifts;
1161             }
1162             y <<=1;
1163 
1164             while (y != 0)
1165             {
1166                 // For each bit of y: Set r1 =  r1 * r1
1167                 // If the bit is 1, set r1 = r1 * x
1168                 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1169                 // Optimization opportunity: if more than 2 bits in y are set,
1170                 // it's usually possible to reduce the number of multiplies
1171                 // by caching odd powers of x. eg for y = 54,
1172                 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1173                 r2 = t2[0 .. r1.length*2];
1174                 squareInternal(r2, r1);
1175                 if (y & 0x8000_0000_0000_0000L)
1176                 {
1177                     r1 = t1[0 .. r2.length + nonzerolength];
1178                     if (singledigit)
1179                     {
1180                         r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1181                     }
1182                     else
1183                     {
1184                         mulInternal(r1, r2, x.data[firstnonzero..$]);
1185                     }
1186                 }
1187                 else
1188                 {
1189                     r1 = t1[0 .. r2.length];
1190                     r1[] = r2[];
1191                 }
1192                 y <<=1;
1193                 shifts--;
1194             }
1195             while (shifts>0)
1196             {
1197                 r2 = t2[0 .. r1.length * 2];
1198                 squareInternal(r2, r1);
1199                 r1 = t1[0 .. r2.length];
1200                 r1[] = r2[];
1201                 --shifts;
1202             }
1203         }
1204 
1205         if (finalMultiplier != 1)
1206         {
1207             const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1208             if (carry)
1209             {
1210                 r1 = t1[0 .. r1.length + 1];
1211                 r1[$-1] = carry;
1212             }
1213         }
1214         if (evenshiftbits)
1215         {
1216             const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1217             if (carry != 0)
1218             {
1219                 r1 = t1[0 .. r1.length + 1];
1220                 r1[$ - 1] = carry;
1221             }
1222         }
1223         while (r1[$ - 1]==0)
1224         {
1225             r1=r1[0 .. $ - 1];
1226         }
1227         return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1228     }
1229 
1230     // Implement toHash so that BigUint works properly as an AA key.
1231     size_t toHash() const @nogc nothrow pure @safe scope
1232     {
1233         return .hashOf(data);
1234     }
1235 
1236 } // end BigUint
1237 
1238 @safe pure nothrow unittest
1239 {
1240     // ulong comparison test
1241     BigUint a = [1];
1242     assert(a == 1);
1243     // https://issues.dlang.org/show_bug.cgi?id=9548
1244     assert(a < 0x8000_0000_0000_0000UL);
1245 
1246     // https://issues.dlang.org/show_bug.cgi?id=12234
1247     BigUint z = [0];
1248     assert(z == 0UL);
1249     assert(!(z > 0UL));
1250     assert(!(z < 0UL));
1251 }
1252 
1253 // https://issues.dlang.org/show_bug.cgi?id=16223
1254 @system pure nothrow unittest
1255 {
1256     BigUint a = [3];
1257     int b = 5;
1258     assert(BigUint.mulInt(a,b) == 15);
1259 }
1260 
1261 // Remove leading zeros from x, to restore the BigUint invariant
1262 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1263 {
1264     size_t k = x.length;
1265     while (k>1 && x[k - 1]==0) --k;
1266     return x[0 .. k];
1267 }
1268 
1269 pure @safe unittest
1270 {
1271    BigUint r = BigUint([5]);
1272    BigUint t = BigUint([7]);
1273    BigUint s = BigUint.mod(r, t);
1274    assert(s == 5);
1275 }
1276 
1277 
1278 @safe pure unittest
1279 {
1280     BigUint r;
1281     r = 5UL;
1282     assert(r.peekUlong(0) == 5UL);
1283     assert(r.peekUint(0) == 5U);
1284     r = 0x1234_5678_9ABC_DEF0UL;
1285     assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1286     assert(r.peekUint(0) == 0x9ABC_DEF0U);
1287 }
1288 
1289 
1290 // Pow tests
1291 pure @safe unittest
1292 {
1293     BigUint r, s;
1294     r.fromHexString("80000000_00000001");
1295     s = BigUint.pow(r, 5);
1296     r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1297       ~ "_00000002_80000000_00000001");
1298     assert(s == r);
1299     s = 10UL;
1300     s = BigUint.pow(s, 39);
1301     r.fromDecimalString("1000000000000000000000000000000000000000");
1302     assert(s == r);
1303     r.fromHexString("1_E1178E81_00000000");
1304     s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1305 
1306     r.fromDecimalString("000_000_00");
1307     assert(r == 0);
1308     r.fromDecimalString("0007");
1309     assert(r == 7);
1310     r.fromDecimalString("0");
1311     assert(r == 0);
1312 }
1313 
1314 // Radix conversion tests
1315 @safe pure unittest
1316 {
1317     BigUint r;
1318     r.fromHexString("1_E1178E81_00000000");
1319     assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1320     assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1321     assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1322     assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1323     assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1324     assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1325     assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1326     assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1327     r = 0UL;
1328     assert(r.toHexString(0, '_', 0) == "0");
1329     assert(r.toHexString(0, '_', 7) == "0000000");
1330     assert(r.toHexString(0, '_', 7, ' ') == "      0");
1331     assert(r.toHexString(0, '#', 9) == "0#00000000");
1332     assert(r.toHexString(0, 0, 9) == "000000000");
1333 }
1334 
1335 //
1336 @safe pure unittest
1337 {
1338     BigUint r;
1339     r.fromHexString("1_E1178E81_00000000");
1340     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1341     assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1342     assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1343     assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1344     assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1345     assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1346     assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1347     assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1348     r = 0UL;
1349     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1350     assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1351     assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1352     assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1353     assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1354     assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1355 }
1356 
1357 
1358 private:
1359 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1360 pure nothrow @safe
1361 {
1362     foreach (i; 0 .. x.length)
1363     {
1364         result[i] = ~x[i];
1365     }
1366     result[x.length..$] = BigDigit.max;
1367 
1368     foreach (i; 0 .. result.length)
1369     {
1370         if (result[i] == BigDigit.max)
1371         {
1372             result[i] = 0;
1373         }
1374         else
1375         {
1376             result[i] += 1;
1377             break;
1378         }
1379     }
1380 }
1381 
1382 // Encode BigInt as BigDigit array (sign and 2's complement)
1383 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1384 pure nothrow @safe
1385 {
1386     size_t length = (x.length > minSize) ? x.length : minSize;
1387     BigDigit [] result = new BigDigit[length];
1388     if (sign)
1389     {
1390         twosComplement(x, result);
1391     }
1392     else
1393     {
1394         result[0 .. x.length] = x;
1395     }
1396     return result;
1397 }
1398 
1399 // works for any type
1400 T intpow(T)(T x, ulong n) pure nothrow @safe
1401 {
1402     T p;
1403 
1404     switch (n)
1405     {
1406     case 0:
1407         p = 1;
1408         break;
1409 
1410     case 1:
1411         p = x;
1412         break;
1413 
1414     case 2:
1415         p = x * x;
1416         break;
1417 
1418     default:
1419         p = 1;
1420         while (1)
1421         {
1422             if (n & 1)
1423                 p *= x;
1424             n >>= 1;
1425             if (!n)
1426                 break;
1427             x *= x;
1428         }
1429         break;
1430     }
1431     return p;
1432 }
1433 
1434 
1435 //  returns the maximum power of x that will fit in a uint.
1436 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1437 {
1438      assert(x > 1, "x must be greater than 1");
1439      static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1440                                           8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1441      if (x<24) return maxpwr[x-2];
1442      if (x<41) return 6;
1443      if (x<85) return 5;
1444      if (x<256) return 4;
1445      if (x<1626) return 3;
1446      if (x<65_536) return 2;
1447      return 1;
1448 }
1449 
1450 //  returns the maximum power of x that will fit in a ulong.
1451 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1452 {
1453      assert(x > 1, "x must be greater than 1");
1454      static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1455                                          17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1456                                          14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1457                                          12, 12, 12, 12, 12, 12, 12, 12, 12];
1458      if (x<41) return maxpwr[x-2];
1459      if (x<57) return 11;
1460      if (x<85) return 10;
1461      if (x<139) return 9;
1462      if (x<256) return 8;
1463      if (x<566) return 7;
1464      if (x<1626) return 6;
1465      if (x<7132) return 5;
1466      if (x<65_536) return 4;
1467      if (x<2_642_246) return 3;
1468      return 2;
1469 }
1470 
1471 version (StdUnittest)
1472 {
1473 
1474 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1475 {
1476      int pwr = 1;
1477      for (ulong q = x;x*q < cast(ulong) uint.max; )
1478      {
1479          q*=x; ++pwr;
1480      }
1481      return pwr;
1482 }
1483 
1484 @safe pure unittest
1485 {
1486     assert(highestPowerBelowUintMax(10)==9);
1487     for (int k=82; k<88; ++k)
1488     {
1489         assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1490     }
1491 }
1492 
1493 }
1494 
1495 
1496 /*  General unsigned subtraction routine for bigints.
1497  *  Sets result = x - y. If the result is negative, negative will be true.
1498  * Returns:
1499  *    unique memory
1500  */
1501 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1502 pure nothrow @safe
1503 {
1504     if (x.length == y.length)
1505     {
1506         // There's a possibility of cancellation, if x and y are almost equal.
1507         ptrdiff_t last = highestDifferentDigit(x, y);
1508         BigDigit [] result = new BigDigit[last+1];
1509         if (x[last] < y[last])
1510         {   // we know result is negative
1511             multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1512             *negative = true;
1513         }
1514         else
1515         {   // positive or zero result
1516             multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1517             *negative = false;
1518         }
1519         while (result.length > 1 && result[$-1] == 0)
1520         {
1521             result = result[0..$-1];
1522         }
1523 //        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1524         return result;
1525     }
1526     // Lengths are different
1527     const(BigDigit) [] large, small;
1528     if (x.length < y.length)
1529     {
1530         *negative = true;
1531         large = y; small = x;
1532     }
1533     else
1534     {
1535         *negative = false;
1536         large = x; small = y;
1537     }
1538     // result.length will be equal to larger length, or could decrease by 1.
1539 
1540 
1541     BigDigit [] result = new BigDigit[large.length];
1542     BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1543     result[small.length..$] = large[small.length..$];
1544     if (carry)
1545     {
1546         multibyteIncrementAssign!('-')(result[small.length..$], carry);
1547     }
1548     while (result.length > 1 && result[$-1] == 0)
1549     {
1550         result = result[0..$-1];
1551     }
1552     return result;
1553 }
1554 
1555 
1556 /*
1557  * return a + b
1558  * Returns:
1559  *    unique memory
1560  */
1561 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1562 {
1563     const(BigDigit) [] x, y;
1564     if (a.length < b.length)
1565     {
1566         x = b; y = a;
1567     }
1568     else
1569     {
1570         x = a; y = b;
1571     }
1572     // now we know x.length > y.length
1573     // create result. add 1 in case it overflows
1574     BigDigit [] result = new BigDigit[x.length + 1];
1575 
1576     BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1577     if (x.length != y.length)
1578     {
1579         result[y.length..$-1]= x[y.length..$];
1580         carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1581     }
1582     if (carry)
1583     {
1584         result[$-1] = carry;
1585         return result;
1586     }
1587     else
1588         return result[0..$-1];
1589 }
1590 
1591 /**  return x + y
1592  */
1593 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1594 {
1595     uint hi = cast(uint)(y >>> 32);
1596     uint lo = cast(uint)(y& 0xFFFF_FFFF);
1597     auto len = x.length;
1598     if (x.length < 2 && hi != 0) ++len;
1599     BigDigit [] result = new BigDigit[len+1];
1600     result[0 .. x.length] = x[];
1601     if (x.length < 2 && hi != 0)
1602     {
1603         result[1]=hi;
1604         hi=0;
1605     }
1606     uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1607     if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1608     if (carry)
1609     {
1610         result[$-1] = carry;
1611         return result;
1612     }
1613     else
1614         return result[0..$-1];
1615 }
1616 
1617 /** Return x - y.
1618  *  x must be greater than y.
1619  */
1620 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1621 {
1622     uint hi = cast(uint)(y >>> 32);
1623     uint lo = cast(uint)(y & 0xFFFF_FFFF);
1624     BigDigit [] result = new BigDigit[x.length];
1625     result[] = x[];
1626     multibyteIncrementAssign!('-')(result[], lo);
1627     if (hi)
1628         multibyteIncrementAssign!('-')(result[1..$], hi);
1629     if (result[$-1] == 0)
1630         return result[0..$-1];
1631     else
1632         return result;
1633 }
1634 
1635 /**  General unsigned multiply routine for bigints.
1636  *  Sets result = x * y.
1637  *
1638  *  The length of y must not be larger than the length of x.
1639  *  Different algorithms are used, depending on the lengths of x and y.
1640  *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1641  *  unbalanced case. (But I doubt it would be faster in practice).
1642  *
1643  */
1644 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1645     pure nothrow @safe
1646 {
1647     import core.memory : GC;
1648     assert( result.length == x.length + y.length,
1649             "result array must have enough space to store computed result");
1650     assert( y.length > 0, "y must not be empty");
1651     assert( x.length >= y.length, "x must be greater or equal than y");
1652     if (y.length <= KARATSUBALIMIT)
1653     {
1654         // Small multiplier, we'll just use the asm classic multiply.
1655         if (y.length == 1)
1656         {   // Trivial case, no cache effects to worry about
1657             result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1658             return;
1659         }
1660 
1661         immutable CACHELIMIT = getCacheLimit;
1662         if (x.length + y.length < CACHELIMIT)
1663             return mulSimple(result, x, y);
1664 
1665         // If x is so big that it won't fit into the cache, we divide it into chunks
1666         // Every chunk must be greater than y.length.
1667         // We make the first chunk shorter, if necessary, to ensure this.
1668 
1669         auto chunksize = CACHELIMIT / y.length;
1670         immutable residual  =  x.length % chunksize;
1671         if (residual < y.length)
1672         {
1673             chunksize -= y.length;
1674         }
1675 
1676         // Use schoolbook multiply.
1677         mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1678         auto done = chunksize;
1679 
1680         while (done < x.length)
1681         {
1682             // result[done .. done+ylength] already has a value.
1683             chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1684             BigDigit [KARATSUBALIMIT] partial;
1685             partial[0 .. y.length] = result[done .. done+y.length];
1686             mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1687             addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1688             done += chunksize;
1689         }
1690         return;
1691     }
1692 
1693     immutable half = (x.length >> 1) + (x.length & 1);
1694     if (2*y.length*y.length <= x.length*x.length)
1695     {
1696         // UNBALANCED MULTIPLY
1697         // Use school multiply to cut into quasi-squares of Karatsuba-size
1698         // or larger. The ratio of the two sides of the 'square' must be
1699         // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1700         //
1701         // For maximum performance, we want the ratio to be as close to
1702         // 1:1 as possible. To achieve this, we can either pad x or y.
1703         // The best choice depends on the modulus x%y.
1704         auto numchunks = x.length / y.length;
1705         auto chunksize = y.length;
1706         auto extra =  x.length % y.length;
1707         auto maxchunk = chunksize + extra;
1708         bool paddingY; // true = we're padding Y, false = we're padding X.
1709         bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1710         if (numchunks == 1 && isExtraSmall)
1711         {
1712             // We divide (x_first_half * y) and (x_last_half * y)
1713             // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1714             // (1.414 ~ 1.707)/2:1 is balanced.
1715             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1716             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1717             scratchbuff = scratchbuff[0 .. $ - y.length];
1718             mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1719             partial[] = result[half .. half + y.length];
1720             mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1721             BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1722             if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1723             () @trusted { GC.free(scratchbuff.ptr); } ();
1724         }
1725         else
1726         {
1727             if (isExtraSmall)
1728             {
1729                 // The leftover bit is small enough that it should be incorporated
1730                 // in the existing chunks.
1731                 // Make all the chunks a tiny bit bigger
1732                 // (We're padding y with zeros)
1733                 chunksize += extra / numchunks;
1734                 extra = x.length - chunksize*numchunks;
1735                 // there will probably be a few left over.
1736                 // Every chunk will either have size chunksize, or chunksize+1.
1737                 maxchunk = chunksize + 1;
1738                 paddingY = true;
1739                 assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1740                     "Unexpected size");
1741             }
1742             else
1743             {
1744                 // the extra bit is large enough that it's worth making a new chunk.
1745                 // (This means we're padding x with zeros, when doing the first one).
1746                 maxchunk = chunksize;
1747                 ++numchunks;
1748                 paddingY = false;
1749                 assert(extra + chunksize *(numchunks-1) == x.length,
1750                     "Unexpected size");
1751             }
1752             // We make the buffer a bit bigger so we have space for the partial sums.
1753             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1754             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1755             scratchbuff = scratchbuff[0 .. $ - y.length];
1756             size_t done; // how much of X have we done so far?
1757             if (paddingY)
1758             {
1759                 // If the first chunk is bigger, do it first. We're padding y.
1760                 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1761                     x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1762                 done = chunksize + (extra > 0 ? 1 : 0);
1763                 if (extra) --extra;
1764             }
1765             else
1766             {   // We're padding X. Begin with the extra bit.
1767                 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1768                 done = extra;
1769                 extra = 0;
1770             }
1771             immutable basechunksize = chunksize;
1772             while (done < x.length)
1773             {
1774                 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1775                 if (extra) --extra;
1776                 partial[] = result[done .. done+y.length];
1777                 mulKaratsuba(result[done .. done + y.length + chunksize],
1778                         x[done .. done+chunksize], y, scratchbuff);
1779                 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1780                 done += chunksize;
1781             }
1782             () @trusted { GC.free(scratchbuff.ptr); } ();
1783         }
1784     }
1785     else
1786     {
1787         // Balanced. Use Karatsuba directly.
1788         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1789         mulKaratsuba(result, x, y, scratchbuff);
1790         () @trusted { GC.free(scratchbuff.ptr); } ();
1791     }
1792 }
1793 
1794 // https://issues.dlang.org/show_bug.cgi?id=20493
1795 @safe unittest
1796 {
1797     // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1798     // the number itself isn't important, only the amount of digits, so we do a simpler
1799     // multiplication of the same size, analogous to:
1800     // 11111111 * 11111111 = 0123456787654321
1801     // but instead of base 10, it's in base `BigDigit`
1802 
1803     BigDigit[398] x = 1;
1804     BigDigit[236] y = 1;
1805     BigDigit[x.length + y.length] result;
1806     mulInternal(result[], x[], y[]);
1807 
1808     // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1809     BigDigit[x.length + y.length] expected = y.length;
1810     foreach (BigDigit i; 0 .. y.length)
1811     {
1812         expected[i] = i+1;
1813         expected[$-1-i] = i;
1814     }
1815 
1816     assert(result == expected);
1817 }
1818 
1819 /**  General unsigned squaring routine for BigInts.
1820  *   Sets result = x*x.
1821  *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1822  *   also be zero.
1823  */
1824 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1825 {
1826   import core.memory : GC;
1827   // Squaring is potentially half a multiply, plus add the squares of
1828   // the diagonal elements.
1829   assert(result.length == 2*x.length,
1830      "result needs to have twice the capacity of x");
1831   if (x.length <= KARATSUBASQUARELIMIT)
1832   {
1833       if (x.length == 1)
1834       {
1835          result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1836          return;
1837       }
1838       return squareSimple(result, x);
1839   }
1840   // The nice thing about squaring is that it always stays balanced
1841   BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1842   squareKaratsuba(result, x, scratchbuff);
1843   () @trusted { GC.free(scratchbuff.ptr); } ();
1844 }
1845 
1846 
1847 import core.bitop : bsr;
1848 
1849 /// if remainder is null, only calculate quotient.
1850 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1851         const BigDigit [] v) pure nothrow @safe
1852 {
1853     import core.memory : GC;
1854     assert(quotient.length == u.length - v.length + 1,
1855         "Invalid quotient length");
1856     assert(remainder == null || remainder.length == v.length,
1857         "Invalid remainder");
1858     assert(v.length > 1, "v must have more than 1 element");
1859     assert(u.length >= v.length, "u must be as longer or longer than v");
1860 
1861     // Normalize by shifting v left just enough so that
1862     // its high-order bit is on, and shift u left the
1863     // same amount. The highest bit of u will never be set.
1864 
1865     BigDigit [] vn = new BigDigit[v.length];
1866     BigDigit [] un = new BigDigit[u.length + 1];
1867     // How much to left shift v, so that its MSB is set.
1868     uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1869     if (s != 0)
1870     {
1871         multibyteShl(vn, v, s);
1872         un[$-1] = multibyteShl(un[0..$-1], u, s);
1873     }
1874     else
1875     {
1876         vn[] = v[];
1877         un[0..$-1] = u[];
1878         un[$-1] = 0;
1879     }
1880     if (quotient.length<FASTDIVLIMIT)
1881     {
1882         schoolbookDivMod(quotient, un, vn);
1883     }
1884     else
1885     {
1886         blockDivMod(quotient, un, vn);
1887     }
1888 
1889     // Unnormalize remainder, if required.
1890     if (remainder != null)
1891     {
1892         if (s == 0) remainder[] = un[0 .. vn.length];
1893         else multibyteShr(remainder, un[0 .. vn.length+1], s);
1894     }
1895     () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1896 }
1897 
1898 pure @safe unittest
1899 {
1900     immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1901     immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1902     uint [] q = new uint[u.length - v.length + 1];
1903     uint [] r = new uint[2];
1904     divModInternal(q, r, u, v);
1905     assert(q[]==[0xFFFF_FFFFu, 0]);
1906     assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1907     u = [0, 0xFFFF_FFFE, 0x8000_0001];
1908     v = [0xFFFF_FFFF, 0x8000_0000];
1909     divModInternal(q, r, u, v);
1910 }
1911 
1912 
1913 // Converts a big uint to a hexadecimal string.
1914 //
1915 // Optionally, a separator character (eg, an underscore) may be added between
1916 // every 8 digits.
1917 // buff.length must be data.length*8 if separator is zero,
1918 // or data.length*9 if separator is non-zero. It will be completely filled.
1919 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1920         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1921 {
1922     int x=0;
1923     for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1924     {
1925         toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1926         x+=8;
1927         if (separator)
1928         {
1929             if (i>0) buff[x] = separator;
1930             ++x;
1931         }
1932     }
1933     return buff;
1934 }
1935 
1936 /**
1937  * Convert a big uint into an octal string.
1938  *
1939  * Params:
1940  *  buff = The destination buffer for the octal string. Must be large enough to
1941  *      store the result, including leading zeroes, which is
1942  *      ceil(data.length * BigDigitBits / 3) characters.
1943  *      The buffer is filled from back to front, starting from `buff[$-1]`.
1944  *  data = The biguint to be converted.
1945  *
1946  * Returns: The index of the leading non-zero digit in `buff`. Will be
1947  * `buff.length - 1` if the entire big uint is zero.
1948  */
1949 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1950     pure nothrow @safe @nogc
1951 {
1952     ubyte carry = 0;
1953     int shift = 0;
1954     size_t penPos = buff.length - 1;
1955     size_t lastNonZero = buff.length - 1;
1956 
1957     pragma(inline) void output(uint digit) @nogc nothrow
1958     {
1959         if (digit != 0)
1960             lastNonZero = penPos;
1961         buff[penPos--] = cast(char)('0' + digit);
1962     }
1963 
1964     foreach (bigdigit; data)
1965     {
1966         if (shift < 0)
1967         {
1968             // Some bits were carried over from previous word.
1969             assert(shift > -3, "shift must be greater than -3");
1970             output(((bigdigit << -shift) | carry) & 0b111);
1971             shift += 3;
1972             assert(shift > 0, "shift must be 1 or greater");
1973         }
1974 
1975         while (shift <= BigDigitBits - 3)
1976         {
1977             output((bigdigit >>> shift) & 0b111);
1978             shift += 3;
1979         }
1980 
1981         if (shift < BigDigitBits)
1982         {
1983             // Some bits need to be carried forward.
1984             carry = (bigdigit >>> shift) & 0b11;
1985         }
1986         shift -= BigDigitBits;
1987         assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
1988     }
1989 
1990     if (shift < 0)
1991     {
1992         // Last word had bits that haven't been output yet.
1993         assert(shift > -3, "Shift must be greater than -3");
1994         output(carry);
1995     }
1996 
1997     return lastNonZero;
1998 }
1999 
2000 /** Convert a big uint into a decimal string.
2001  *
2002  * Params:
2003  *  data    The biguint to be converted. Will be destroyed.
2004  *  buff    The destination buffer for the decimal string. Must be
2005  *          large enough to store the result, including leading zeros.
2006  *          Will be filled backwards, starting from buff[$-1].
2007  *
2008  * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2009  * Returns:
2010  *    the lowest index of buff which was used.
2011  */
2012 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2013 {
2014     ptrdiff_t sofar = buff.length;
2015     // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2016     // the price of 3 divisions and a shr; this version only gives 27 digits
2017     // for 3 divisions.
2018     while (data.length>1)
2019     {
2020         uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2021         itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2022         sofar -= 9;
2023         if (data[$-1] == 0 && data.length > 1)
2024         {
2025             data.length = data.length - 1;
2026         }
2027     }
2028     itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2029     sofar -= 10;
2030     // and strip off the leading zeros
2031     while (sofar != buff.length-1 && buff[sofar] == '0')
2032         sofar++;
2033     return sofar;
2034 }
2035 
2036 /** Convert a decimal string into a big uint.
2037  *
2038  * Params:
2039  *  data    The biguint to be receive the result. Must be large enough to
2040  *          store the result.
2041  *  s       The decimal string. May contain _ or 0 .. 9
2042  *
2043  * The required length for the destination buffer is slightly less than
2044  *  1 + s.length/log2(10) = 1 + s.length/3.3219.
2045  *
2046  * Returns:
2047  *    the highest index of data which was used.
2048  */
2049 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2050 if (
2051     isInputRange!Range &&
2052     isSomeChar!(ElementType!Range) &&
2053     !isInfinite!Range)
2054 in
2055 {
2056     static if (hasLength!Range)
2057         assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2058             "data has a invalid length");
2059 }
2060 do
2061 {
2062     import std.conv : ConvException;
2063 
2064     // Convert to base 1e19 = 10_000_000_000_000_000_000.
2065     // (this is the largest power of 10 that will fit into a long).
2066     // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2067     // 485 bits will only just fit into 146 decimal digits.
2068     // As we convert the string, we record the number of digits we've seen in base 19:
2069     // hi is the number of digits/19, lo is the extra digits (0 to 18).
2070     // TODO: This is inefficient for very large strings (it is O(n^^2)).
2071     // We should take advantage of fast multiplication once the numbers exceed
2072     // Karatsuba size.
2073     uint lo = 0; // number of powers of digits, 0 .. 18
2074     uint x = 0;
2075     ulong y = 0;
2076     uint hi = 0; // number of base 1e19 digits
2077     data[0] = 0; // initially number is 0.
2078     if (data.length > 1)
2079         data[1] = 0;
2080 
2081     foreach (character; s)
2082     {
2083         if (character == '_')
2084             continue;
2085 
2086         if (character < '0' || character > '9')
2087             throw new ConvException("invalid digit");
2088         x *= 10;
2089         x += character - '0';
2090         ++lo;
2091         if (lo == 9)
2092         {
2093             y = x;
2094             x = 0;
2095         }
2096         if (lo == 18)
2097         {
2098             y *= 10_0000_0000;
2099             y += x;
2100             x = 0;
2101         }
2102         if (lo == 19)
2103         {
2104             y *= 10;
2105             y += x;
2106             x = 0;
2107             // Multiply existing number by 10^19, then add y1.
2108             if (hi>0)
2109             {
2110                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2111                 ++hi;
2112                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2113                 ++hi;
2114             }
2115             else
2116                 hi = 2;
2117             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2118             c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2119             if (c != 0)
2120             {
2121                 data[hi]=c;
2122                 ++hi;
2123             }
2124             y = 0;
2125             lo = 0;
2126         }
2127     }
2128     // Now set y = all remaining digits.
2129     if (lo >= 18)
2130     {
2131     }
2132     else if (lo >= 9)
2133     {
2134         for (int k=9; k<lo; ++k) y*=10;
2135         y+=x;
2136     }
2137     else
2138     {
2139         for (int k=0; k<lo; ++k) y*=10;
2140         y+=x;
2141     }
2142     if (lo != 0)
2143     {
2144         if (hi == 0)
2145         {
2146             data[0] = cast(uint) y;
2147             if (data.length == 1)
2148             {
2149                 hi = 1;
2150             }
2151             else
2152             {
2153                 data[1] = cast(uint)(y >>> 32);
2154                 hi=2;
2155             }
2156         }
2157         else
2158         {
2159             while (lo>0)
2160             {
2161                 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2162                 if (c != 0)
2163                 {
2164                     data[hi]=c;
2165                     ++hi;
2166                 }
2167                 --lo;
2168             }
2169             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2170             if (y > 0xFFFF_FFFFL)
2171             {
2172                 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2173             }
2174             if (c != 0)
2175             {
2176                 data[hi]=c;
2177                 ++hi;
2178             }
2179         }
2180     }
2181     while (hi>1 && data[hi-1]==0)
2182         --hi;
2183     return hi;
2184 }
2185 
2186 
2187 // ------------------------
2188 // These in-place functions are only for internal use; they are incompatible
2189 // with COW.
2190 
2191 // Classic 'schoolbook' multiplication.
2192 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2193         const(BigDigit)[] right) pure nothrow @safe
2194 in
2195 {
2196     assert(result.length == left.length + right.length,
2197         "Result must be able to store left + right");
2198     assert(right.length>1, "right must not be empty");
2199 }
2200 do
2201 {
2202     result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2203     multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2204 }
2205 
2206 // Classic 'schoolbook' squaring
2207 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2208 in
2209 {
2210     assert(result.length == 2*x.length, "result must be twice as long as x");
2211     assert(x.length>1, "x must not be empty");
2212 }
2213 do
2214 {
2215     multibyteSquare(result, x);
2216 }
2217 
2218 
2219 // add two uints of possibly different lengths. Result must be as long
2220 // as the larger length.
2221 // Returns carry (0 or 1).
2222 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2223 pure nothrow @safe
2224 in
2225 {
2226     assert(result.length == left.length,
2227         "result and left must be of the same length");
2228     assert(left.length >= right.length,
2229         "left must be longer or of equal length to right");
2230     assert(right.length > 0, "right must not be empty");
2231 }
2232 do
2233 {
2234     uint carry = multibyteAdd(result[0 .. right.length],
2235             left[0 .. right.length], right, 0);
2236     if (right.length < left.length)
2237     {
2238         result[right.length .. left.length] = left[right.length .. $];
2239         carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2240     }
2241     return carry;
2242 }
2243 
2244 /* result = result - right
2245  * Returns carry = 1 if result was less than right.
2246 */
2247 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2248 pure nothrow @safe
2249 {
2250     assert(result.length >= right.length,
2251        "result must be longer or of equal length to right");
2252     uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2253     if (c && result.length > right.length)
2254         c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2255     return c;
2256 }
2257 
2258 /* result = result + right
2259 */
2260 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2261 pure nothrow @safe
2262 {
2263     assert(result.length >= right.length,
2264        "result must be longer or of equal length to right");
2265     uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2266     if (c && result.length > right.length)
2267        c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2268     return c;
2269 }
2270 
2271 /* performs result += wantSub? - right : right;
2272 */
2273 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2274         bool wantSub) pure nothrow @safe
2275 {
2276     if (wantSub)
2277         return subAssignSimple(result, right);
2278     else
2279         return addAssignSimple(result, right);
2280 }
2281 
2282 
2283 // return true if x<y, considering leading zeros
2284 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2285 {
2286     assert(x.length >= y.length,
2287        "x must be longer or of equal length to y");
2288     auto k = x.length-1;
2289     while (x[k]==0 && k >= y.length)
2290         --k;
2291     if (k >= y.length)
2292         return false;
2293     while (k>0 && x[k]==y[k])
2294         --k;
2295     return x[k] < y[k];
2296 }
2297 
2298 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2299 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2300     pure nothrow @safe
2301 {
2302     assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2303         "result must capable to store the maximum of x and y");
2304 
2305     size_t minlen;
2306     bool negative;
2307     if (x.length >= y.length)
2308     {
2309         minlen = y.length;
2310         negative = less(x, y);
2311     }
2312     else
2313     {
2314        minlen = x.length;
2315        negative = !less(y, x);
2316     }
2317     const (BigDigit)[] large, small;
2318     if (negative)
2319     {
2320         large = y; small = x;
2321     }
2322     else
2323     {
2324         large = x; small = y;
2325     }
2326 
2327     BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2328     if (x.length != y.length)
2329     {
2330         result[minlen .. large.length]= large[minlen..$];
2331         result[large.length..$] = 0;
2332         if (carry)
2333             multibyteIncrementAssign!('-')(result[minlen..$], carry);
2334     }
2335     return negative;
2336 }
2337 
2338 /* Determine how much space is required for the temporaries
2339  * when performing a Karatsuba multiplication.
2340  * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2341  * https://issues.dlang.org/show_bug.cgi?id=20493
2342  */
2343 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2344 {
2345     return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2346 }
2347 
2348 /* Sets result = x*y, using Karatsuba multiplication.
2349 * x must be longer or equal to y.
2350 * Valid only for balanced multiplies, where x is not shorter than y.
2351 * It is superior to schoolbook multiplication if and only if
2352 *    sqrt(2)*y.length > x.length > y.length.
2353 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2354 * The maximum allowable length of x and y is uint.max; but better algorithms
2355 * should be used far before that length is reached.
2356 * Params:
2357 * scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2358 */
2359 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2360         const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2361 {
2362     assert(x.length >= y.length, "x must be greater or equal to y");
2363     assert(result.length < uint.max, "Operands too large");
2364     assert(result.length == x.length + y.length,
2365         "result must be as large as x + y");
2366     if (x.length <= KARATSUBALIMIT)
2367     {
2368         return mulSimple(result, x, y);
2369     }
2370     // Must be almost square (otherwise, a schoolbook iteration is better)
2371     assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2372         "Bigint Internal Error: Asymmetric Karatsuba");
2373 
2374     // The subtractive version of Karatsuba multiply uses the following result:
2375     // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2376     // where mid = (x0-x1)*(y0-y1)
2377     // requiring 3 multiplies of length N, instead of 4.
2378     // The advantage of the subtractive over the additive version is that
2379     // the mid multiply cannot exceed length N. But there are subtleties:
2380     // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2381     // retain all of the leading zeros in the subtractions
2382 
2383     // half length, round up.
2384     auto half = (x.length >> 1) + (x.length & 1);
2385 
2386     const(BigDigit) [] x0 = x[0 .. half];
2387     const(BigDigit) [] x1 = x[half .. $];
2388     const(BigDigit) [] y0 = y[0 .. half];
2389     const(BigDigit) [] y1 = y[half .. $];
2390     BigDigit [] mid = scratchbuff[0 .. half*2];
2391     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2392     BigDigit [] resultLow = result[0 .. 2*half];
2393     BigDigit [] resultHigh = result[2*half .. $];
2394      // initially use result to store temporaries
2395     BigDigit [] xdiff= result[0 .. half];
2396     BigDigit [] ydiff = result[half .. half*2];
2397 
2398     // First, we calculate mid, and sign of mid
2399     immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2400                       ^ inplaceSub(ydiff, y0, y1);
2401     mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2402 
2403     // Low half of result gets x0 * y0. High half gets x1 * y1
2404 
2405     mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2406 
2407     if (2L * y1.length * y1.length < x1.length * x1.length)
2408     {
2409         // an asymmetric situation has been created.
2410         // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2411         // Applying one schoolbook multiply gives us two pieces each 1.2:1
2412         if (y1.length <= KARATSUBALIMIT)
2413             mulSimple(resultHigh, x1, y1);
2414         else
2415         {
2416             // divide x1 in two, then use schoolbook multiply on the two pieces.
2417             auto quarter = (x1.length >> 1) + (x1.length & 1);
2418             immutable ysmaller = (quarter >= y1.length);
2419             mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2420                 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2421             // Save the part which will be overwritten.
2422             immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2423             newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2424             mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2425                 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2426 
2427             resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2428         }
2429     }
2430     else
2431         mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2432 
2433     /* We now have result = x0y0 + (N*N)*x1y1
2434        Before adding or subtracting mid, we must calculate
2435        result += N * (x0y0 + x1y1)
2436        We can do this with three half-length additions. With a = x0y0, b = x1y1:
2437                       aHI aLO
2438         +       aHI   aLO
2439         +       bHI   bLO
2440         +  bHI  bLO
2441         =  R3   R2    R1   R0
2442         R1 = aHI + bLO + aLO
2443         R2 = aHI + bLO + aHI + carry_from_R1
2444         R3 = bHi + carry_from_R2
2445 
2446      It might actually be quicker to do it in two full-length additions:
2447      newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2448      addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2449    */
2450     BigDigit[] R1 = result[half .. half*2];
2451     BigDigit[] R2 = result[half*2 .. half*3];
2452     BigDigit[] R3 = result[half*3..$];
2453     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2454     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2455     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2456     if (c1+c2)
2457         multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2458     if (c1+c3)
2459         multibyteIncrementAssign!('+')(R3, c1+c3);
2460 
2461     // And finally we subtract mid
2462     addOrSubAssignSimple(result[half..$], mid, !midNegative);
2463 }
2464 
2465 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2466         BigDigit [] scratchbuff) pure nothrow @safe
2467 {
2468     // See mulKaratsuba for implementation comments.
2469     // Squaring is simpler, since it never gets asymmetric.
2470     assert(result.length < uint.max, "Operands too large");
2471     assert(result.length == 2*x.length,
2472         "result must be twice the length of x");
2473     if (x.length <= KARATSUBASQUARELIMIT)
2474     {
2475         return squareSimple(result, x);
2476     }
2477     // half length, round up.
2478     auto half = (x.length >> 1) + (x.length & 1);
2479 
2480     const(BigDigit)[] x0 = x[0 .. half];
2481     const(BigDigit)[] x1 = x[half .. $];
2482     BigDigit [] mid = scratchbuff[0 .. half*2];
2483     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2484      // initially use result to store temporaries
2485     BigDigit [] xdiff= result[0 .. half];
2486     const BigDigit [] ydiff = result[half .. half*2];
2487 
2488     // First, we calculate mid. We don't need its sign
2489     inplaceSub(xdiff, x0, x1);
2490     squareKaratsuba(mid, xdiff, newscratchbuff);
2491 
2492     // Set result = x0x0 + (N*N)*x1x1
2493     squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2494     squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2495 
2496     /* result += N * (x0x0 + x1x1)
2497        Do this with three half-length additions. With a = x0x0, b = x1x1:
2498         R1 = aHI + bLO + aLO
2499         R2 = aHI + bLO + aHI + carry_from_R1
2500         R3 = bHi + carry_from_R2
2501     */
2502     BigDigit[] R1 = result[half .. half*2];
2503     BigDigit[] R2 = result[half*2 .. half*3];
2504     BigDigit[] R3 = result[half*3..$];
2505     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2506     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2507     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2508     if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2509     if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2510 
2511     // And finally we subtract mid, which is always positive
2512     subAssignSimple(result[half..$], mid);
2513 }
2514 
2515 /* Knuth's Algorithm D, as presented in
2516  * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2517  * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2518  * Given u and v, calculates  quotient  = u / v, u = u % v.
2519  * v must be normalized (ie, the MSB of v must be 1).
2520  * The most significant words of quotient and u may be zero.
2521  * u[0 .. v.length] holds the remainder.
2522  */
2523 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2524     pure nothrow @safe
2525 {
2526     assert(quotient.length == u.length - v.length,
2527         "quotient has wrong length");
2528     assert(v.length > 1, "v must not be empty");
2529     assert(u.length >= v.length, "u must be larger or equal to v");
2530     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2531     assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2532     // BUG: This code only works if BigDigit is uint.
2533     uint vhi = v[$-1];
2534     uint vlo = v[$-2];
2535 
2536     for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2537     {
2538         // Compute estimate of quotient[j],
2539         // qhat = (three most significant words of u)/(two most sig words of v).
2540         uint qhat;
2541         if (u[j + v.length] == vhi)
2542         {
2543             // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2544             qhat = uint.max;
2545         }
2546         else
2547         {
2548             uint ulo = u[j + v.length - 2];
2549             version (D_InlineAsm_X86)
2550             {
2551                 // Note: On DMD, this is only ~10% faster than the non-asm code.
2552                 uint *p = &u[j + v.length - 1];
2553                 asm pure nothrow @trusted
2554                 {
2555                     mov EAX, p;
2556                     mov EDX, [EAX+4];
2557                     mov EAX, [EAX];
2558                     div dword ptr [vhi];
2559                     mov qhat, EAX;
2560                     mov ECX, EDX;
2561 div3by2correction:
2562                     mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2563                     sub EAX, ulo;
2564                     sbb EDX, ECX;
2565                     jbe div3by2done;
2566                     mov EAX, qhat;
2567                     dec EAX;
2568                     mov qhat, EAX;
2569                     add ECX, dword ptr [vhi];
2570                     jnc div3by2correction;
2571 div3by2done:    ;
2572                 }
2573             }
2574             else
2575             { // version (InlineAsm)
2576                 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2577                 immutable bigqhat = uu / vhi;
2578                 ulong rhat =  uu - bigqhat * vhi;
2579                 qhat = cast(uint) bigqhat;
2580 again:
2581                 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2582                 {
2583                     --qhat;
2584                     rhat += vhi;
2585                     if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2586                         goto again;
2587                 }
2588             } // version (InlineAsm)
2589         }
2590         // Multiply and subtract.
2591         uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2592 
2593         if (u[j+v.length] < carry)
2594         {
2595             // If we subtracted too much, add back
2596             --qhat;
2597             carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2598         }
2599         quotient[j] = qhat;
2600         u[j + v.length] = u[j + v.length] - carry;
2601     }
2602 }
2603 
2604 // TODO: Replace with a library call
2605 void itoaZeroPadded(char[] output, uint value)
2606     pure nothrow @safe @nogc
2607 {
2608     for (auto i = output.length; i--;)
2609     {
2610         if (value < 10)
2611         {
2612             output[i] = cast(char)(value + '0');
2613             value = 0;
2614         }
2615         else
2616         {
2617             output[i] = cast(char)(value % 10 + '0');
2618             value /= 10;
2619         }
2620     }
2621 }
2622 
2623 void toHexZeroPadded(char[] output, uint value,
2624         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2625 {
2626     ptrdiff_t x = output.length - 1;
2627     static immutable string upperHexDigits = "0123456789ABCDEF";
2628     static immutable string lowerHexDigits = "0123456789abcdef";
2629     for ( ; x >= 0; --x)
2630     {
2631         if (letterCase == LetterCase.upper)
2632         {
2633             output[x] = upperHexDigits[value & 0xF];
2634         }
2635         else
2636         {
2637             output[x] = lowerHexDigits[value & 0xF];
2638         }
2639         value >>= 4;
2640     }
2641 }
2642 
2643 // Returns the highest value of i for which left[i]!=right[i],
2644 // or 0 if left[] == right[]
2645 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2646 pure nothrow @nogc @safe
2647 {
2648     assert(left.length == right.length,
2649         "left have a length equal to that of right");
2650     for (ptrdiff_t i = left.length - 1; i>0; --i)
2651     {
2652         if (left[i] != right[i])
2653             return i;
2654     }
2655     return 0;
2656 }
2657 
2658 // Returns the lowest value of i for which x[i]!=0.
2659 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2660 {
2661     int k = 0;
2662     while (x[k]==0)
2663     {
2664         ++k;
2665         assert(k < x.length, "k must be less than x.length");
2666     }
2667     return k;
2668 }
2669 
2670 /*
2671     Calculate quotient and remainder of u / v using fast recursive division.
2672     v must be normalised, and must be at least half as long as u.
2673     Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2674     scratch is temporary storage space, length must be >= quotient + 1.
2675 
2676 Returns:
2677     u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2678 
2679     Implements algorithm 1.8 from MCA.
2680     This algorithm has an annoying special case. After the first recursion, the
2681     highest bit of the quotient may be set. This means that in the second
2682     recursive call, the 'in' contract would be violated. (This happens only
2683     when the top quarter of u is equal to the top half of v. A base 10
2684     equivalent example of this situation is 5517/56; the first step gives
2685     55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2686     u and the quotient. 'mayOverflow' indicates that that the special case
2687     has occurred.
2688     (In MCA, a different strategy is used: the in contract is weakened, and
2689     schoolbookDivMod is more general: it allows the high bit of u to be set).
2690     See also:
2691     - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2692       Max-Planck Institute fuer Informatik, (Oct 1998).
2693 */
2694 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2695                      BigDigit[] scratch, bool mayOverflow = false)
2696                      pure nothrow @safe
2697 in
2698 {
2699     // v must be normalized
2700     assert(v.length > 1, "v must not be empty");
2701     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2702     assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2703     assert(quotient.length == u.length - v.length,
2704         "quotient must be of equal length of u - v");
2705     if (mayOverflow)
2706     {
2707         assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2708         assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2709     }
2710 
2711     // Must be symmetric. Use block schoolbook division if not.
2712     assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2713         "Invalid length of u");
2714     assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2715         "Invalid length of u");
2716     assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2717         "Invalid quotient length");
2718 }
2719 do
2720 {
2721     if (quotient.length < FASTDIVLIMIT)
2722     {
2723         return schoolbookDivMod(quotient, u, v);
2724     }
2725 
2726     // Split quotient into two halves, but keep padding in the top half
2727     auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2728 
2729     // RECURSION 1: Calculate the high half of the quotient
2730 
2731     // Note that if u and quotient were padded, they remain padded during
2732     // this call, so in contract is satisfied.
2733     recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2734         scratch, mayOverflow);
2735 
2736     // quotient[k..$] is our guess at the high quotient.
2737     // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2738     // first remainder. u[0 .. 2*k] is the low part.
2739 
2740     // Calculate the full first remainder to be
2741     //    remainder - highQuotient * lowDivisor
2742     // reducing highQuotient until the remainder is positive.
2743     // The low part of the remainder, u[0 .. k], cannot be altered by this.
2744 
2745     adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2746             scratch[0 .. quotient.length], mayOverflow);
2747 
2748     // RECURSION 2: Calculate the low half of the quotient
2749     // The full first remainder is now in u[0 .. k + v.length].
2750 
2751     if (u[k + v.length - 1] & 0x8000_0000)
2752     {
2753         // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2754         // This means we need an extra quotient word for the next recursion.
2755         // We need to restore the invariant for the recursive calls.
2756         // We do this by padding both u and quotient. Extending u is trivial,
2757         // because the higher words will not be used again. But for the
2758         // quotient, we're clobbering the low word of the high quotient,
2759         // so we need save it, and add it back in after the recursive call.
2760 
2761         auto clobberedQuotient = quotient[k];
2762         u[k+v.length] = 0;
2763 
2764         recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2765             v[k .. $], scratch, true);
2766         adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2767             scratch[0 .. 2 * k+1], true);
2768 
2769         // Now add the quotient word that got clobbered earlier.
2770         multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2771     }
2772     else
2773     {
2774         // The special case has NOT happened.
2775         recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2776             scratch, false);
2777 
2778         // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2779 
2780         adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2781             scratch[0 .. 2 * k]);
2782     }
2783 }
2784 
2785 // rem -= quot * v[0 .. k].
2786 // If would make rem negative, decrease quot until rem is >= 0.
2787 // Needs (quot.length * k) scratch space to store the result of the multiply.
2788 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2789         ptrdiff_t k,
2790         BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2791 {
2792     assert(rem.length == v.length, "rem must be as long as v");
2793     mulInternal(scratch, quot, v[0 .. k]);
2794     uint carry = 0;
2795     if (mayOverflow)
2796         carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2797     else
2798         carry = subAssignSimple(rem, scratch);
2799     while (carry)
2800     {
2801         multibyteIncrementAssign!('-')(quot, 1); // quot--
2802         carry -= multibyteAdd(rem, rem, v, 0);
2803     }
2804 }
2805 
2806 // Cope with unbalanced division by performing block schoolbook division.
2807 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2808 pure nothrow @safe
2809 {
2810     import core.memory : GC;
2811     assert(quotient.length == u.length - v.length,
2812         "quotient must be of equal length of u - v");
2813     assert(v.length > 1, "v must not be empty");
2814     assert(u.length >= v.length, "u must be longer or of equal length as v");
2815     assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2816     assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2817     BigDigit [] scratch = new BigDigit[v.length + 1];
2818 
2819     // Perform block schoolbook division, with 'v.length' blocks.
2820     auto m = u.length - v.length;
2821     while (m > v.length)
2822     {
2823         immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2824         BigDigit saveq;
2825         if (mayOverflow)
2826         {
2827             u[m + v.length] = 0;
2828             saveq = quotient[m];
2829         }
2830         recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2831             u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2832         if (mayOverflow)
2833         {
2834             assert(quotient[m] == 0, "quotient must not be 0");
2835             quotient[m] = saveq;
2836         }
2837         m -= v.length;
2838     }
2839     recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2840     () @trusted { GC.free(scratch.ptr); } ();
2841 }
2842 
2843 @system unittest
2844 {
2845     version (none)
2846     {
2847         import core.stdc.stdio;
2848 
2849         void printBiguint(const uint [] data)
2850         {
2851             char [] buff = biguintToHex(new char[data.length*9], data, '_');
2852             printf("%.*s\n", cast(int) buff.length, buff.ptr);
2853         }
2854 
2855         void printDecimalBigUint(BigUint data)
2856         {
2857             auto str = data.toDecimalString(0);
2858             printf("%.*s\n", cast(int) str.length, str.ptr);
2859         }
2860     }
2861 
2862     uint [] a, b;
2863     a = new uint[43];
2864     b = new uint[179];
2865     for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2866     for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2867 
2868     a[$-1] |= 0x8000_0000;
2869     uint [] r = new uint[a.length];
2870     uint [] q = new uint[b.length-a.length+1];
2871 
2872     divModInternal(q, r, b, a);
2873     q = q[0..$-1];
2874     uint [] r1 = r.dup;
2875     uint [] q1 = q.dup;
2876     blockDivMod(q, b, a);
2877     r = b[0 .. a.length];
2878     assert(r[] == r1[]);
2879     assert(q[] == q1[]);
2880 }
2881 
2882 // biguintToOctal
2883 @safe unittest
2884 {
2885     enum bufSize = 5 * BigDigitBits / 3 + 1;
2886     auto buf = new char[bufSize];
2887     size_t i;
2888     BigDigit[] data = [ 342391 ];
2889 
2890     // Basic functionality with single word
2891     i = biguintToOctal(buf, data);
2892     assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2893 
2894     // Test carrying bits between words
2895     data = [ 0x77053977, 0x39770539, 0x00000005 ];
2896     i = biguintToOctal(buf, data);
2897     assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2898 
2899     // Test carried bits in the last word
2900     data = [ 0x80000000 ];
2901     i = biguintToOctal(buf, data);
2902     assert(buf[i .. $] == "20000000000");
2903 
2904     // Test boundary between 3rd and 4th word where the number of bits is
2905     // divisible by 3 and no bits should be carried.
2906     //
2907     // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2908     // rollover happens, so that if any bugs happen in wrongly adding the carry
2909     // to the next word, non-zero bits will show up in the output.
2910     data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2911     i = biguintToOctal(buf, data);
2912     assert(buf[i .. $] == "2060000000001400000000030000000000");
2913 
2914     // Boundary case: 0
2915     data = [ 0 ];
2916     i = biguintToOctal(buf, data);
2917     assert(buf[i .. $] == "0");
2918 }