1 /** Fundamental operations for arbitrary-precision arithmetic
2  *
3  * These functions are for internal use only.
4  */
5 /*          Copyright Don Clugston 2008 - 2010.
6  * Distributed under the Boost Software License, Version 1.0.
7  *    (See accompanying file LICENSE_1_0.txt or copy at
8  *          http://www.boost.org/LICENSE_1_0.txt)
9  */
10 /* References:
11    "Modern Computer Arithmetic" (MCA) is the primary reference for all
12     algorithms used in this library.
13   - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14     Version 0.5.9, (Oct 2010).
15   - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16     Max-Planck Institute fuer Informatik, (Oct 1998).
17   - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18     INRIA 4664, (Dec 2002).
19   - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20     http://bodrato.it/papers (2006).
21   - A. Fog, "Optimizing subroutines in assembly language",
22     www.agner.org/optimize (2008).
23   - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24     www.agner.org/optimize (2008).
25   - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26     and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27 
28 Idioms:
29   Many functions in this module use
30   'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31   in order to disable implicit conversion.
32 
33 */
34 module std.internal.math.biguintcore;
35 
36 version (D_InlineAsm_X86)
37 {
38     static import std.internal.math.biguintx86;
39 }
40 static import std.internal.math.biguintnoasm;
41 
42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43     KARATSUBASQUARELIMIT;
44 
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47 
48 private import std.traits;
49 private import std.range.primitives;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53 
54 private:
55 
56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58 // used by the other architectures too.
59 // See comments below in case it has to be refactored.
60 version (X86)
61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62 {
63     // must be checked before, otherwise D_InlineAsm_X86 is true.
64     if (__ctfe)
65         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66     // Runtime.
67     else version (D_InlineAsm_X86)
68         return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69     // Runtime if no asm available.
70     else
71         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72 }
73 // Any other architecture
74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75 
76 version (X86)
77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78 {
79     if (__ctfe)
80         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81     else version (D_InlineAsm_X86)
82         return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83     else
84         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85 }
86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87 
88 version (X86)
89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90 {
91     if (__ctfe)
92         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93     else version (D_InlineAsm_X86)
94         return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95     else
96         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97 }
98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99 
100 version (X86)
101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102 {
103     if (__ctfe)
104         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105     else version (D_InlineAsm_X86)
106         std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107     else
108         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109 }
110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111 
112 version (X86)
113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114 {
115     if (__ctfe)
116         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117     else version (D_InlineAsm_X86)
118         return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119     else
120         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121 }
122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123 
124 version (X86)
125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126 {
127     if (__ctfe)
128         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129     else version (D_InlineAsm_X86)
130         return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131     else
132         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133 }
134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135 
136 version (X86)
137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138 {
139     if (__ctfe)
140         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141     else version (D_InlineAsm_X86)
142         std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143     else
144         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145 }
146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147 
148 version (X86)
149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150 {
151     if (__ctfe)
152         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153     else version (D_InlineAsm_X86)
154         return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155     else
156         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157 }
158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159 
160 version (X86)
161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162 {
163     if (__ctfe)
164         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165     else version (D_InlineAsm_X86)
166         std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167     else
168         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169 }
170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171 
172 version (X86)
173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174 {
175     if (__ctfe)
176         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177     else version (D_InlineAsm_X86)
178         std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179     else
180         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181 }
182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183 
184 version (X86)
185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186 {
187     if (__ctfe)
188         std.internal.math.biguintnoasm.multibyteSquare(result, x);
189     else version (D_InlineAsm_X86)
190         std.internal.math.biguintx86.multibyteSquare(result, x);
191     else
192         std.internal.math.biguintnoasm.multibyteSquare(result, x);
193 }
194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195 
196 // Limits for when to switch between algorithms.
197 // Half the size of the data cache.
198 @nogc nothrow pure @safe size_t getCacheLimit()
199 {
200     import core.cpuid : dataCaches;
201     return dataCaches[0].size * 1024 / 2;
202 }
203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204 
205 
206 // These constants are used by shift operations
207 static if (BigDigit.sizeof == int.sizeof)
208 {
209     enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210     alias BIGHALFDIGIT = ushort;
211 }
212 else static if (BigDigit.sizeof == long.sizeof)
213 {
214     alias BIGHALFDIGIT = uint;
215     enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216 }
217 else static assert(0, "Unsupported BigDigit size");
218 
219 import std.exception : assumeUnique;
220 import std.traits : isIntegral;
221 enum BigDigitBits = BigDigit.sizeof*8;
222 template maxBigDigits(T)
223 if (isIntegral!T)
224 {
225     enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226 }
227 
228 static immutable BigDigit[] ZERO = [0];
229 static immutable BigDigit[] ONE = [1];
230 static immutable BigDigit[] TWO = [2];
231 static immutable BigDigit[] TEN = [10];
232 
233 
234 public:
235 
236 /// BigUint performs memory management and wraps the low-level calls.
237 struct BigUint
238 {
239 private:
240     pure invariant()
241     {
242         assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243                 "Invariant requires data to not empty or zero");
244     }
245 
246     immutable(BigDigit) [] data = ZERO;
247 
248     this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249     {
250        data = x;
251     }
252   package(std)  // used from: std.bigint
253     this(T)(T x) pure nothrow @safe scope
254     if (isIntegral!T)
255     {
256         opAssign(x);
257     }
258 
259     enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
260         return assumeUnique(input);
261     };
262 public:
263     // Length in uints
264     @property size_t uintLength() pure nothrow const @safe @nogc scope
265     {
266         static if (BigDigit.sizeof == uint.sizeof)
267         {
268             return data.length;
269         }
270         else static if (BigDigit.sizeof == ulong.sizeof)
271         {
272             return data.length * 2 -
273             ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
274         }
275     }
276     @property size_t ulongLength() pure nothrow const @safe @nogc scope
277     {
278         static if (BigDigit.sizeof == uint.sizeof)
279         {
280             return (data.length + 1) >> 1;
281         }
282         else static if (BigDigit.sizeof == ulong.sizeof)
283         {
284             return data.length;
285         }
286     }
287 
288     // The value at (cast(ulong[]) data)[n]
289     ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
290     {
291         static if (BigDigit.sizeof == int.sizeof)
292         {
293             if (data.length == n*2 + 1) return data[n*2];
294             return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
295         }
296         else static if (BigDigit.sizeof == long.sizeof)
297         {
298             return data[n];
299         }
300     }
301 
302     uint peekUint(size_t n) pure nothrow const @safe @nogc scope
303     {
304         static if (BigDigit.sizeof == int.sizeof)
305         {
306             return data[n];
307         }
308         else
309         {
310             immutable x = data[n >> 1];
311             return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
312         }
313     }
314 
315     ///
316     void opAssign(Tulong)(Tulong u) pure nothrow @safe scope
317     if (is (Tulong == ulong))
318     {
319         if (u == 0) data = ZERO;
320         else if (u == 1) data = ONE;
321         else if (u == 2) data = TWO;
322         else if (u == 10) data = TEN;
323         else
324         {
325             static if (BigDigit.sizeof == int.sizeof)
326             {
327                 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
328                 uint uhi = cast(uint)(u >> 32);
329                 if (uhi == 0)
330                 {
331                     data = [ulo];
332                 }
333                 else
334                 {
335                     data = [ulo, uhi];
336                 }
337             }
338             else static if (BigDigit.sizeof == long.sizeof)
339             {
340                 data = [u];
341             }
342         }
343     }
344     void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
345     {
346         this.data = y.data;
347     }
348 
349     ///
350     int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
351     {
352         if (data.length != y.data.length)
353             return (data.length > y.data.length) ?  1 : -1;
354         size_t k = highestDifferentDigit(data, y.data);
355         if (data[k] == y.data[k])
356             return 0;
357         return data[k] > y.data[k] ? 1 : -1;
358     }
359 
360     ///
361     int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope
362     if (is (Tulong == ulong))
363     {
364         if (data.length > maxBigDigits!Tulong)
365             return 1;
366 
367         foreach_reverse (i; 0 .. maxBigDigits!Tulong)
368         {
369             BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
370             if (tmp == 0)
371                 if (data.length >= i+1)
372                 {
373                     // Since ZERO is [0], so we cannot simply return 1 here, as
374                     // data[i] would be 0 for i == 0 in that case.
375                     return (data[i] > 0) ? 1 : 0;
376                 }
377                 else
378                     continue;
379             else
380                 if (i+1 > data.length)
381                     return -1;
382                 else if (tmp != data[i])
383                     return data[i] > tmp ? 1 : -1;
384         }
385         return 0;
386     }
387 
388     bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
389     {
390            return y.data[] == data[];
391     }
392 
393     bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
394     {
395         if (data.length > 2)
396             return false;
397         uint ylo = cast(uint)(y & 0xFFFF_FFFF);
398         uint yhi = cast(uint)(y >> 32);
399         if (data.length == 2 && data[1]!=yhi)
400             return false;
401         if (data.length == 1 && yhi != 0)
402             return false;
403         return (data[0] == ylo);
404     }
405 
406     bool isZero() pure const nothrow @safe @nogc scope
407     {
408         return data.length == 1 && data[0] == 0;
409     }
410 
411     size_t numBytes() pure nothrow const @safe @nogc scope
412     {
413         return data.length * BigDigit.sizeof;
414     }
415 
416     // the extra bytes are added to the start of the string
417     char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
418     {
419         immutable predictlength = 20+20*(data.length/2); // just over 19
420         char [] buff = new char[frontExtraBytes + predictlength];
421         ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
422         return buff[sofar-frontExtraBytes..$];
423     }
424 
425     /** Convert to a hex string, printing a minimum number of digits 'minPadding',
426      *  allocating an additional 'frontExtraBytes' at the start of the string.
427      *  Padding is done with padChar, which may be '0' or ' '.
428      *  'separator' is a digit separation character. If non-zero, it is inserted
429      *  between every 8 digits.
430      *  Separator characters do not contribute to the minPadding.
431      */
432     char [] toHexString(int frontExtraBytes, char separator = 0,
433             int minPadding=0, char padChar = '0',
434             LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
435     {
436         // Calculate number of extra padding bytes
437         size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
438             ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
439 
440         // Length not including separator bytes
441         size_t lenBytes = data.length * 2 * BigDigit.sizeof;
442 
443         // Calculate number of separator bytes
444         size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
445         immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
446 
447         char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
448         biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
449         if (extraPad > 0)
450         {
451             if (separator)
452             {
453                 size_t start = frontExtraBytes; // first index to pad
454                 if (extraPad &7)
455                 {
456                     // Do 1 to 7 extra zeros.
457                     buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
458                     buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
459                     start += (extraPad & 7) + 1;
460                 }
461                 for (int i=0; i< (extraPad >> 3); ++i)
462                 {
463                     buff[start .. start + 8] = padChar;
464                     buff[start + 8] = (padChar == ' ' ? ' ' : separator);
465                     start += 9;
466                 }
467             }
468             else
469             {
470                 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
471             }
472         }
473         int z = frontExtraBytes;
474         if (lenBytes > minPadding)
475         {
476             // Strip leading zeros.
477             ptrdiff_t maxStrip = lenBytes - minPadding;
478             while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
479             {
480                 ++z;
481                 --maxStrip;
482             }
483         }
484         if (padChar!='0')
485         {
486             // Convert leading zeros into padChars.
487             for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
488             {
489                 if (buff[k]=='0') buff[k]=padChar;
490             }
491         }
492         return buff[z-frontExtraBytes..$];
493     }
494 
495     /**
496      * Convert to an octal string.
497      */
498     char[] toOctalString() pure nothrow @safe const scope
499     {
500         auto predictLength = 1 + data.length*BigDigitBits / 3;
501         char[] buff = new char[predictLength];
502         size_t firstNonZero = biguintToOctal(buff, data);
503         return buff[firstNonZero .. $];
504     }
505 
506     // return false if invalid character found
507     bool fromHexString(Range)(Range s) scope
508     if (isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
509     {
510         import std.range : walkLength;
511 
512         //Strip leading zeros
513         while (!s.empty && s.front == '0')
514             s.popFront;
515 
516         if (s.empty)
517         {
518             data = ZERO;
519             return true;
520         }
521 
522         immutable len = (s.save.walkLength + 15) / 4;
523         auto tmp = new BigDigit[len + 1];
524         uint part, sofar, partcount;
525 
526         foreach_reverse (character; s)
527         {
528             if (character == '_')
529                 continue;
530 
531             uint x;
532             if (character >= '0' && character <= '9')
533             {
534                 x = character - '0';
535             }
536             else if (character >= 'A' && character <= 'F')
537             {
538                 x = character - 'A' + 10;
539             }
540             else if (character >= 'a' && character <= 'f')
541             {
542                 x = character - 'a' + 10;
543             }
544             else
545             {
546                 return false;
547             }
548 
549             part >>= 4;
550             part |= (x << (32 - 4));
551             ++partcount;
552 
553             if (partcount == 8)
554             {
555                 tmp[sofar] = part;
556                 ++sofar;
557                 partcount = 0;
558                 part = 0;
559             }
560         }
561         if (part)
562         {
563             for ( ; partcount != 8; ++partcount) part >>= 4;
564             tmp[sofar] = part;
565             ++sofar;
566         }
567         if (sofar == 0)
568             data = ZERO;
569         else
570             data = trustedAssumeUnique(tmp[0 .. sofar]);
571 
572         return true;
573     }
574 
575     // return true if OK; false if erroneous characters found
576     bool fromDecimalString(Range)(Range s) scope
577     if (isForwardRange!Range && isSomeChar!(ElementType!Range))
578     {
579         import std.range : walkLength;
580 
581         while (!s.empty && s.front == '0')
582         {
583             s.popFront;
584         }
585 
586         if (s.empty)
587         {
588             data = ZERO;
589             return true;
590         }
591 
592         auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
593         auto tmp = new BigDigit[predict_length];
594 
595         tmp.length = biguintFromDecimal(tmp, s);
596 
597         data = trustedAssumeUnique(tmp);
598         return true;
599     }
600 
601     void fromMagnitude(Range)(Range magnitude) scope
602     if (isInputRange!Range
603         && (isForwardRange!Range || hasLength!Range)
604         && isUnsigned!(ElementType!Range))
605     {
606         while (!magnitude.empty && magnitude.front == 0)
607             magnitude.popFront;
608         static if (hasLength!Range)
609             immutable inputLen = magnitude.length;
610         else
611             immutable inputLen = magnitude.save.walkLength;
612         if (!inputLen)
613         {
614             this.data = ZERO;
615             return;
616         }
617         // `magnitude` has its most significant element first but BigUint.data
618         // stores the most significant last.
619         BigDigit[] newDigits;
620         alias E = ElementType!Range;
621         static if (E.sizeof == BigDigit.sizeof)
622         {
623             newDigits = new BigDigit[inputLen];
624             foreach_reverse (ref digit; newDigits)
625             {
626                 digit = magnitude.front;
627                 magnitude.popFront();
628             }
629         }
630         else static if (E.sizeof < BigDigit.sizeof)
631         {
632             enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
633             newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
634             immutable remainder = inputLen % elementsPerDigit;
635             // If there is a remainder specially assemble the most significant digit.
636             if (remainder)
637             {
638                 BigDigit tmp = magnitude.front;
639                 magnitude.popFront();
640                 foreach (_; 1 .. remainder)
641                 {
642                     tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
643                     magnitude.popFront();
644                 }
645                 newDigits[$-1] = tmp;
646             }
647             // Assemble full digits from most to least significant.
648             foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
649             {
650                 BigDigit tmp;
651                 static foreach (n; 0 .. elementsPerDigit)
652                 {
653                     tmp |= cast(BigDigit) magnitude.front <<
654                         ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
655                     magnitude.popFront();
656                 }
657                 digit = tmp;
658             }
659         }
660         else static if (E.sizeof > BigDigit.sizeof)
661         {
662             enum digitsPerElement = E.sizeof / BigDigit.sizeof;
663             newDigits = new BigDigit[inputLen * digitsPerElement];
664             size_t i = newDigits.length - 1;
665             foreach (element; magnitude)
666             {
667                 static foreach (n; 0 .. digitsPerElement)
668                     newDigits[i - n] =
669                         cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
670                 i -= digitsPerElement;
671             }
672             while (newDigits[$-1] == 0)
673                 newDigits = newDigits[0 .. $-1];
674         }
675         else
676             static assert(0);
677         this.data = trustedAssumeUnique(newDigits);
678         return;
679     }
680 
681     nothrow pure @safe unittest
682     {
683         immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
684         // Internal representation is most-significant-last but `fromMagnitude`
685         // argument is most-significant-first.
686         immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
687         BigUint b;
688         // Test with reference magnitude.
689         b.fromMagnitude(referenceMagnitude);
690         assert(b.data == referenceData);
691         // Test ubyte array.
692         import std.bitmanip : nativeToBigEndian;
693         ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
694             nativeToBigEndian(referenceMagnitude[1]) ~
695             nativeToBigEndian(referenceMagnitude[2]);
696         b.data = ZERO;
697         b.fromMagnitude(ubyteMagnitude);
698         assert(b.data == referenceData);
699         // Test ulong array.
700         static if (BigDigit.sizeof == uint.sizeof)
701             immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
702                 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
703             ];
704         else static if (BigDigit.sizeof == ulong.sizeof)
705             alias ulongMagnitude = referenceMagnitude;
706         b.data = ZERO;
707         b.fromMagnitude(ulongMagnitude);
708         assert(b.data == referenceData);
709     }
710 
711     ////////////////////////
712     //
713     // All of these member functions create a new BigUint.
714 
715     // return x >> y
716     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
717     if (op == ">>" && is (Tulong == ulong))
718     {
719         assert(y > 0, "Can not right shift BigUint by 0");
720         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
721         if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
722         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
723         if (bits == 0)
724         {
725             return BigUint(data[words..$]);
726         }
727         else
728         {
729             uint [] result = new BigDigit[data.length - words];
730             multibyteShr(result, data[words..$], bits);
731 
732             if (result.length > 1 && result[$-1] == 0)
733                 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
734             else
735                 return BigUint(trustedAssumeUnique(result));
736         }
737     }
738 
739     // return x << y
740     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
741     if (op == "<<" && is (Tulong == ulong))
742     {
743         assert(y > 0, "Can not left shift BigUint by 0");
744         if (isZero()) return this;
745         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
746         assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
747                 "Shift result exceeds temporary store");
748         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
749         BigDigit [] result = new BigDigit[data.length + words+1];
750         result[0 .. words] = 0;
751         if (bits == 0)
752         {
753             result[words .. words+data.length] = data[];
754             return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
755         }
756         else
757         {
758             immutable c = multibyteShl(result[words .. words+data.length], data, bits);
759             if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
760             result[$-1] = c;
761             return BigUint(trustedAssumeUnique(result));
762         }
763     }
764 
765     // If wantSub is false, return x + y, leaving sign unchanged
766     // If wantSub is true, return abs(x - y), negating sign if x < y
767     static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, bool wantSub, ref bool sign) pure nothrow @safe
768     if (is(Tulong == ulong))
769     {
770         BigUint r;
771         if (wantSub)
772         {   // perform a subtraction
773             if (x.data.length > 2)
774             {
775                 // subInt returns GC allocated array, can be safely cast to immutable
776                 r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
777             }
778             else
779             {   // could change sign!
780                 ulong xx = x.data[0];
781                 if (x.data.length > 1)
782                     xx += (cast(ulong) x.data[1]) << 32;
783                 ulong d;
784                 if (xx <= y)
785                 {
786                     d = y - xx;
787                     sign = !sign;
788                 }
789                 else
790                 {
791                     d = xx - y;
792                 }
793                 if (d == 0)
794                 {
795                     r = 0UL;
796                     sign = false;
797                     return r;
798                 }
799                 if (d > uint.max)
800                 {
801                     r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
802                 }
803                 else
804                 {
805                     r.data = [cast(uint)(d & 0xFFFF_FFFF)];
806                 }
807             }
808         }
809         else
810         {
811             // addInt returns GC allocated array, can be safely cast to immutable
812             r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
813         }
814         return r;
815     }
816 
817     // If wantSub is false, return x + y, leaving sign unchanged.
818     // If wantSub is true, return abs(x - y), negating sign if x<y
819     static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
820         pure nothrow @safe
821     {
822         BigUint r;
823         if (wantSub)
824         {   // perform a subtraction
825             bool negative;
826             // sub returns GC allocated array, can be safely cast to immutable
827             r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
828             sign ^= negative;
829             if (r.isZero())
830             {
831                 sign = false;
832             }
833         }
834         else
835         {
836             // add returns GC allocated array, can be safely cast to immutable
837             r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
838         }
839         return r;
840     }
841 
842 
843     //  return x*y.
844     static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
845     {
846         if (y == 0 || x == 0) return BigUint(ZERO);
847         static if (T.sizeof * 8 <= 32)
848             uint hi = 0;
849         else
850             uint hi = cast(uint) (y >>> 32);
851         uint lo = cast(uint) (y & 0xFFFF_FFFF);
852         uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
853         result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
854         if (hi != 0)
855         {
856             result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
857                 x.data, hi, 0);
858         }
859         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
860     }
861 
862     /*  return x * y.
863      */
864     static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
865     {
866         if (y == 0 || x == 0)
867             return BigUint(ZERO);
868         auto len = x.data.length + y.data.length;
869         BigDigit [] result = new BigDigit[len];
870         if (y.data.length > x.data.length)
871         {
872             mulInternal(result, y.data, x.data);
873         }
874         else
875         {
876             if (x.data[]==y.data[]) squareInternal(result, x.data);
877             else mulInternal(result, x.data, y.data);
878         }
879         // the highest element could be zero,
880         // in which case we need to reduce the length
881         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
882     }
883 
884     // return x / y
885     static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
886     if ( is(immutable T == immutable uint) )
887     {
888         uint y = y_;
889         if (y == 1)
890             return x;
891         uint [] result = new BigDigit[x.data.length];
892         if ((y&(-y))==y)
893         {
894             assert(y != 0, "BigUint division by zero");
895             // perfect power of 2
896             uint b = 0;
897             for (;y != 1; y>>=1)
898             {
899                 ++b;
900             }
901             multibyteShr(result, x.data, b);
902         }
903         else
904         {
905             result[] = x.data[];
906             cast(void) multibyteDivAssign(result, y, 0);
907         }
908         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
909     }
910 
911     static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
912     if ( is(immutable T == immutable ulong) )
913     {
914         if (y <= uint.max)
915             return divInt!uint(x, cast(uint) y);
916         if (x.data.length < 2)
917             return BigUint(ZERO);
918         uint hi = cast(uint)(y >>> 32);
919         uint lo = cast(uint)(y & 0xFFFF_FFFF);
920         immutable uint[2] z = [lo, hi];
921         BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
922         divModInternal(result, null, x.data, z[]);
923         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
924     }
925 
926     // return x % y
927     static uint modInt(T)(scope BigUint x, T y_) pure
928     if ( is(immutable T == immutable uint) )
929     {
930         import core.memory : GC;
931         uint y = y_;
932         assert(y != 0, "% 0 not allowed");
933         if ((y&(-y)) == y)
934         {   // perfect power of 2
935             return x.data[0] & (y-1);
936         }
937         else
938         {
939             // horribly inefficient - malloc, copy, & store are unnecessary.
940             uint [] wasteful = new BigDigit[x.data.length];
941             wasteful[] = x.data[];
942             immutable rem = multibyteDivAssign(wasteful, y, 0);
943             if (!__ctfe)
944             {
945                 //use free only at runtime
946                 () @trusted { GC.free(wasteful.ptr); } ();
947             }
948             return rem;
949         }
950     }
951 
952     // return x / y
953     static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
954     {
955         if (y.data.length > x.data.length)
956             return BigUint(ZERO);
957         if (y.data.length == 1)
958             return divInt(x, y.data[0]);
959         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
960            divModInternal(result, null, x.data, y.data);
961         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
962     }
963 
964     // return x % y
965     static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
966     {
967         if (y.data.length > x.data.length) return x;
968         if (y.data.length == 1)
969         {
970             return BigUint([modInt(x, y.data[0])]);
971         }
972         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
973         BigDigit [] rem = new BigDigit[y.data.length];
974         divModInternal(result, rem, x.data, y.data);
975         return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
976     }
977 
978     // Return x / y in quotient, x % y in remainder
979     static void divMod(BigUint x, scope BigUint y,
980                        out BigUint quotient, out BigUint remainder) pure nothrow @safe
981     {
982         /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
983         if (y.data.length > x.data.length)
984         {
985             quotient = 0uL;
986             remainder = x;
987         }
988         else if (y.data.length == 1)
989         {
990             quotient = divInt(x, y.data[0]);
991             remainder = BigUint([modInt(x, y.data[0])]);
992         }
993         else
994         {
995             BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
996             BigDigit[] rem = new BigDigit[y.data.length];
997             divModInternal(quot, rem, x.data, y.data);
998             quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
999             remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
1000         }
1001     }
1002 
1003     // return x op y
1004     static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
1005     pure nothrow @safe
1006     if (op == "|" || op == "^" || op == "&")
1007     {
1008         auto d1 = includeSign(x.data, y.uintLength, xSign);
1009         auto d2 = includeSign(y.data, x.uintLength, ySign);
1010 
1011         foreach (i; 0 .. d1.length)
1012         {
1013             mixin("d1[i] " ~ op ~ "= d2[i];");
1014         }
1015 
1016         mixin("resultSign = xSign " ~ op ~ " ySign;");
1017 
1018         if (resultSign)
1019         {
1020             twosComplement(d1, d1);
1021         }
1022 
1023         return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1024     }
1025 
1026     /**
1027      * Return a BigUint which is x raised to the power of y.
1028      * Method: Powers of 2 are removed from x, then left-to-right binary
1029      * exponentiation is used.
1030      * Memory allocation is minimized: at most one temporary BigUint is used.
1031      */
1032     static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1033     {
1034         // Deal with the degenerate cases first.
1035         if (y == 0) return BigUint(ONE);
1036         if (y == 1) return x;
1037         if (x == 0 || x == 1) return x;
1038 
1039         BigUint result;
1040 
1041         // Simplify, step 1: Remove all powers of 2.
1042         uint firstnonzero = firstNonZeroDigit(x.data);
1043         // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1044         // where BigDigitBits = BigDigit.sizeof * 8
1045 
1046         // See if x[firstnonzero..$] can now fit into a single digit.
1047         bool singledigit = ((x.data.length - firstnonzero) == 1);
1048         // If true, then x0 is that digit
1049         // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1050         BigDigit x0 = x.data[firstnonzero];
1051         assert(x0 != 0, "pow(0, y) not allowed");
1052         // Length of the non-zero portion
1053         size_t nonzerolength = x.data.length - firstnonzero;
1054         ulong y0;
1055         uint evenbits = 0; // number of even bits in the bottom of x
1056         while (!(x0 & 1))
1057         {
1058             x0 >>= 1;
1059             ++evenbits;
1060         }
1061 
1062         if (x.data.length- firstnonzero == 2)
1063         {
1064             // Check for a single digit straddling a digit boundary
1065             const BigDigit x1 = x.data[firstnonzero+1];
1066             if ((x1 >> evenbits) == 0)
1067             {
1068                 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1069                 singledigit = true;
1070             }
1071         }
1072         // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1073 
1074         uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1075 
1076         // Simplify, step 2: For singledigits, see if we can trivially reduce y
1077 
1078         BigDigit finalMultiplier = 1UL;
1079 
1080         if (singledigit)
1081         {
1082             // x fits into a single digit. Raise it to the highest power we can
1083             // that still fits into a single digit, then reduce the exponent accordingly.
1084             // We're quite likely to have a residual multiply at the end.
1085             // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1086             // and 5^^13 still fits into a uint.
1087             evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1088             if (x0 == 1)
1089             {   // Perfect power of 2
1090                 result = 1UL;
1091                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1092             }
1093             immutable p = highestPowerBelowUintMax(x0);
1094             if (y <= p)
1095             {   // Just do it with pow
1096                 result = cast(ulong) intpow(x0, y);
1097                 if (evenbits + firstnonzero == 0)
1098                     return result;
1099                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1100             }
1101             y0 = y / p;
1102             finalMultiplier = intpow(x0, y - y0*p);
1103             x0 = intpow(x0, p);
1104             // Result is x0
1105             nonzerolength = 1;
1106         }
1107         // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1108 
1109         // Perform a crude check for overflow and allocate result buffer.
1110         // The length required is y * lg2(x) bits.
1111         // which will always fit into y*x.length digits. But this is
1112         // a gross overestimate if x is small (length 1 or 2) and the highest
1113         // digit is nearly empty.
1114         // A better estimate is:
1115         //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1116         //  and the first term is always between
1117         //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1118         //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1119         // For single digit payloads, we already have
1120         //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1121         // and x0 is almost a full digit, so it's a tight estimate.
1122         // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1123         // Note that the divisions must be rounded up.
1124 
1125         // Estimated length in BigDigits
1126         immutable estimatelength = singledigit
1127             ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1128             :  x.data.length * y;
1129         // Imprecise check for overflow. Makes the extreme cases easier to debug
1130         // (less extreme overflow will result in an out of memory error).
1131         if (estimatelength > uint.max/(4*BigDigit.sizeof))
1132             assert(0, "Overflow in BigInt.pow");
1133 
1134         // The result buffer includes space for all the trailing zeros
1135         BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1136 
1137         // Do all the powers of 2!
1138         size_t result_start = cast(size_t)( firstnonzero * y
1139             + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1140 
1141         resultBuffer[0 .. result_start] = 0;
1142         BigDigit [] t1 = resultBuffer[result_start..$];
1143         BigDigit [] r1;
1144 
1145         if (singledigit)
1146         {
1147             r1 = t1[0 .. 1];
1148             r1[0] = x0;
1149             y = y0;
1150         }
1151         else
1152         {
1153             // It's not worth right shifting by evenbits unless we also shrink the length after each
1154             // multiply or squaring operation. That might still be worthwhile for large y.
1155             r1 = t1[0 .. x.data.length - firstnonzero];
1156             r1[0..$] = x.data[firstnonzero..$];
1157         }
1158 
1159         if (y>1)
1160         {   // Set r1 = r1 ^^ y.
1161             // The secondary buffer only needs space for the multiplication results
1162             BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1163             BigDigit [] r2;
1164 
1165             int shifts = 63; // num bits in a long
1166             while (!(y & 0x8000_0000_0000_0000L))
1167             {
1168                 y <<= 1;
1169                 --shifts;
1170             }
1171             y <<=1;
1172 
1173             while (y != 0)
1174             {
1175                 // For each bit of y: Set r1 =  r1 * r1
1176                 // If the bit is 1, set r1 = r1 * x
1177                 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1178                 // Optimization opportunity: if more than 2 bits in y are set,
1179                 // it's usually possible to reduce the number of multiplies
1180                 // by caching odd powers of x. eg for y = 54,
1181                 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1182                 r2 = t2[0 .. r1.length*2];
1183                 squareInternal(r2, r1);
1184                 if (y & 0x8000_0000_0000_0000L)
1185                 {
1186                     r1 = t1[0 .. r2.length + nonzerolength];
1187                     if (singledigit)
1188                     {
1189                         r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1190                     }
1191                     else
1192                     {
1193                         mulInternal(r1, r2, x.data[firstnonzero..$]);
1194                     }
1195                 }
1196                 else
1197                 {
1198                     r1 = t1[0 .. r2.length];
1199                     r1[] = r2[];
1200                 }
1201                 y <<=1;
1202                 shifts--;
1203             }
1204             while (shifts>0)
1205             {
1206                 r2 = t2[0 .. r1.length * 2];
1207                 squareInternal(r2, r1);
1208                 r1 = t1[0 .. r2.length];
1209                 r1[] = r2[];
1210                 --shifts;
1211             }
1212         }
1213 
1214         if (finalMultiplier != 1)
1215         {
1216             const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1217             if (carry)
1218             {
1219                 r1 = t1[0 .. r1.length + 1];
1220                 r1[$-1] = carry;
1221             }
1222         }
1223         if (evenshiftbits)
1224         {
1225             const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1226             if (carry != 0)
1227             {
1228                 r1 = t1[0 .. r1.length + 1];
1229                 r1[$ - 1] = carry;
1230             }
1231         }
1232         while (r1[$ - 1]==0)
1233         {
1234             r1=r1[0 .. $ - 1];
1235         }
1236         return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1237     }
1238 
1239     // Implement toHash so that BigUint works properly as an AA key.
1240     size_t toHash() const @nogc nothrow pure @safe scope
1241     {
1242         return .hashOf(data);
1243     }
1244 
1245 } // end BigUint
1246 
1247 @safe pure nothrow unittest
1248 {
1249     // ulong comparison test
1250     BigUint a = [1];
1251     assert(a == 1);
1252     // https://issues.dlang.org/show_bug.cgi?id=9548
1253     assert(a < 0x8000_0000_0000_0000UL);
1254 
1255     // https://issues.dlang.org/show_bug.cgi?id=12234
1256     BigUint z = [0];
1257     assert(z == 0UL);
1258     assert(!(z > 0UL));
1259     assert(!(z < 0UL));
1260 }
1261 
1262 // https://issues.dlang.org/show_bug.cgi?id=16223
1263 @system pure nothrow unittest
1264 {
1265     BigUint a = [3];
1266     int b = 5;
1267     assert(BigUint.mulInt(a,b) == 15);
1268 }
1269 
1270 // Remove leading zeros from x, to restore the BigUint invariant
1271 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1272 {
1273     size_t k = x.length;
1274     while (k>1 && x[k - 1]==0) --k;
1275     return x[0 .. k];
1276 }
1277 
1278 pure @safe unittest
1279 {
1280    BigUint r = BigUint([5]);
1281    BigUint t = BigUint([7]);
1282    BigUint s = BigUint.mod(r, t);
1283    assert(s == 5);
1284 }
1285 
1286 pure @safe unittest
1287 { //evaluate mod at compile time
1288    enum BigUint r = BigUint([5]);
1289    enum BigUint t = BigUint([7]);
1290    enum BigUint s = BigUint.mod(r, t);
1291    static assert(s == 5);
1292 }
1293 
1294 @safe pure unittest
1295 {
1296     BigUint r;
1297     r = 5UL;
1298     assert(r.peekUlong(0) == 5UL);
1299     assert(r.peekUint(0) == 5U);
1300     r = 0x1234_5678_9ABC_DEF0UL;
1301     assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1302     assert(r.peekUint(0) == 0x9ABC_DEF0U);
1303 }
1304 
1305 
1306 // Pow tests
1307 pure @safe unittest
1308 {
1309     BigUint r, s;
1310     r.fromHexString("80000000_00000001");
1311     s = BigUint.pow(r, 5);
1312     r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1313       ~ "_00000002_80000000_00000001");
1314     assert(s == r);
1315     s = 10UL;
1316     s = BigUint.pow(s, 39);
1317     r.fromDecimalString("1000000000000000000000000000000000000000");
1318     assert(s == r);
1319     r.fromHexString("1_E1178E81_00000000");
1320     s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1321 
1322     r.fromDecimalString("000_000_00");
1323     assert(r == 0);
1324     r.fromDecimalString("0007");
1325     assert(r == 7);
1326     r.fromDecimalString("0");
1327     assert(r == 0);
1328 }
1329 
1330 // Radix conversion tests
1331 @safe pure unittest
1332 {
1333     BigUint r;
1334     r.fromHexString("1_E1178E81_00000000");
1335     assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1336     assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1337     assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1338     assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1339     assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1340     assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1341     assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1342     assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1343     r = 0UL;
1344     assert(r.toHexString(0, '_', 0) == "0");
1345     assert(r.toHexString(0, '_', 7) == "0000000");
1346     assert(r.toHexString(0, '_', 7, ' ') == "      0");
1347     assert(r.toHexString(0, '#', 9) == "0#00000000");
1348     assert(r.toHexString(0, 0, 9) == "000000000");
1349 }
1350 
1351 //
1352 @safe pure unittest
1353 {
1354     BigUint r;
1355     r.fromHexString("1_E1178E81_00000000");
1356     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1357     assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1358     assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1359     assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1360     assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1361     assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1362     assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1363     assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1364     r = 0UL;
1365     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1366     assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1367     assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1368     assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1369     assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1370     assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1371 }
1372 
1373 
1374 private:
1375 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1376 pure nothrow @safe
1377 {
1378     foreach (i; 0 .. x.length)
1379     {
1380         result[i] = ~x[i];
1381     }
1382     result[x.length..$] = BigDigit.max;
1383 
1384     foreach (i; 0 .. result.length)
1385     {
1386         if (result[i] == BigDigit.max)
1387         {
1388             result[i] = 0;
1389         }
1390         else
1391         {
1392             result[i] += 1;
1393             break;
1394         }
1395     }
1396 }
1397 
1398 // Encode BigInt as BigDigit array (sign and 2's complement)
1399 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1400 pure nothrow @safe
1401 {
1402     size_t length = (x.length > minSize) ? x.length : minSize;
1403     BigDigit [] result = new BigDigit[length];
1404     if (sign)
1405     {
1406         twosComplement(x, result);
1407     }
1408     else
1409     {
1410         result[0 .. x.length] = x;
1411     }
1412     return result;
1413 }
1414 
1415 // works for any type
1416 T intpow(T)(T x, ulong n) pure nothrow @safe
1417 {
1418     T p;
1419 
1420     switch (n)
1421     {
1422     case 0:
1423         p = 1;
1424         break;
1425 
1426     case 1:
1427         p = x;
1428         break;
1429 
1430     case 2:
1431         p = x * x;
1432         break;
1433 
1434     default:
1435         p = 1;
1436         while (1)
1437         {
1438             if (n & 1)
1439                 p *= x;
1440             n >>= 1;
1441             if (!n)
1442                 break;
1443             x *= x;
1444         }
1445         break;
1446     }
1447     return p;
1448 }
1449 
1450 
1451 //  returns the maximum power of x that will fit in a uint.
1452 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1453 {
1454      assert(x > 1, "x must be greater than 1");
1455      static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1456                                           8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1457      if (x<24) return maxpwr[x-2];
1458      if (x<41) return 6;
1459      if (x<85) return 5;
1460      if (x<256) return 4;
1461      if (x<1626) return 3;
1462      if (x<65_536) return 2;
1463      return 1;
1464 }
1465 
1466 //  returns the maximum power of x that will fit in a ulong.
1467 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1468 {
1469      assert(x > 1, "x must be greater than 1");
1470      static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1471                                          17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1472                                          14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1473                                          12, 12, 12, 12, 12, 12, 12, 12, 12];
1474      if (x<41) return maxpwr[x-2];
1475      if (x<57) return 11;
1476      if (x<85) return 10;
1477      if (x<139) return 9;
1478      if (x<256) return 8;
1479      if (x<566) return 7;
1480      if (x<1626) return 6;
1481      if (x<7132) return 5;
1482      if (x<65_536) return 4;
1483      if (x<2_642_246) return 3;
1484      return 2;
1485 }
1486 
1487 version (StdUnittest)
1488 {
1489 
1490 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1491 {
1492      int pwr = 1;
1493      for (ulong q = x;x*q < cast(ulong) uint.max; )
1494      {
1495          q*=x; ++pwr;
1496      }
1497      return pwr;
1498 }
1499 
1500 @safe pure unittest
1501 {
1502     assert(highestPowerBelowUintMax(10)==9);
1503     for (int k=82; k<88; ++k)
1504     {
1505         assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1506     }
1507 }
1508 
1509 }
1510 
1511 
1512 /*  General unsigned subtraction routine for bigints.
1513  *  Sets result = x - y. If the result is negative, negative will be true.
1514  * Returns:
1515  *    unique memory
1516  */
1517 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1518 pure nothrow @safe
1519 {
1520     if (x.length == y.length)
1521     {
1522         // There's a possibility of cancellation, if x and y are almost equal.
1523         ptrdiff_t last = highestDifferentDigit(x, y);
1524         BigDigit [] result = new BigDigit[last+1];
1525         if (x[last] < y[last])
1526         {   // we know result is negative
1527             multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1528             *negative = true;
1529         }
1530         else
1531         {   // positive or zero result
1532             multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1533             *negative = false;
1534         }
1535         while (result.length > 1 && result[$-1] == 0)
1536         {
1537             result = result[0..$-1];
1538         }
1539 //        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1540         return result;
1541     }
1542     // Lengths are different
1543     const(BigDigit) [] large, small;
1544     if (x.length < y.length)
1545     {
1546         *negative = true;
1547         large = y; small = x;
1548     }
1549     else
1550     {
1551         *negative = false;
1552         large = x; small = y;
1553     }
1554     // result.length will be equal to larger length, or could decrease by 1.
1555 
1556 
1557     BigDigit [] result = new BigDigit[large.length];
1558     BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1559     result[small.length..$] = large[small.length..$];
1560     if (carry)
1561     {
1562         multibyteIncrementAssign!('-')(result[small.length..$], carry);
1563     }
1564     while (result.length > 1 && result[$-1] == 0)
1565     {
1566         result = result[0..$-1];
1567     }
1568     return result;
1569 }
1570 
1571 
1572 /*
1573  * return a + b
1574  * Returns:
1575  *    unique memory
1576  */
1577 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1578 {
1579     const(BigDigit) [] x, y;
1580     if (a.length < b.length)
1581     {
1582         x = b; y = a;
1583     }
1584     else
1585     {
1586         x = a; y = b;
1587     }
1588     // now we know x.length > y.length
1589     // create result. add 1 in case it overflows
1590     BigDigit [] result = new BigDigit[x.length + 1];
1591 
1592     BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1593     if (x.length != y.length)
1594     {
1595         result[y.length..$-1]= x[y.length..$];
1596         carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1597     }
1598     if (carry)
1599     {
1600         result[$-1] = carry;
1601         return result;
1602     }
1603     else
1604         return result[0..$-1];
1605 }
1606 
1607 /**  return x + y
1608  */
1609 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1610 {
1611     uint hi = cast(uint)(y >>> 32);
1612     uint lo = cast(uint)(y& 0xFFFF_FFFF);
1613     auto len = x.length;
1614     if (x.length < 2 && hi != 0) ++len;
1615     BigDigit [] result = new BigDigit[len+1];
1616     result[0 .. x.length] = x[];
1617     if (x.length < 2 && hi != 0)
1618     {
1619         result[1]=hi;
1620         hi=0;
1621     }
1622     uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1623     if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1624     if (carry)
1625     {
1626         result[$-1] = carry;
1627         return result;
1628     }
1629     else
1630         return result[0..$-1];
1631 }
1632 
1633 /** Return x - y.
1634  *  x must be greater than y.
1635  */
1636 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1637 {
1638     uint hi = cast(uint)(y >>> 32);
1639     uint lo = cast(uint)(y & 0xFFFF_FFFF);
1640     BigDigit [] result = new BigDigit[x.length];
1641     result[] = x[];
1642     multibyteIncrementAssign!('-')(result[], lo);
1643     if (hi)
1644         multibyteIncrementAssign!('-')(result[1..$], hi);
1645     if (result[$-1] == 0)
1646         return result[0..$-1];
1647     else
1648         return result;
1649 }
1650 
1651 /**  General unsigned multiply routine for bigints.
1652  *  Sets result = x * y.
1653  *
1654  *  The length of y must not be larger than the length of x.
1655  *  Different algorithms are used, depending on the lengths of x and y.
1656  *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1657  *  unbalanced case. (But I doubt it would be faster in practice).
1658  *
1659  */
1660 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1661     pure nothrow @safe
1662 {
1663     import core.memory : GC;
1664     assert( result.length == x.length + y.length,
1665             "result array must have enough space to store computed result");
1666     assert( y.length > 0, "y must not be empty");
1667     assert( x.length >= y.length, "x must be greater or equal than y");
1668     if (y.length <= KARATSUBALIMIT)
1669     {
1670         // Small multiplier, we'll just use the asm classic multiply.
1671         if (y.length == 1)
1672         {   // Trivial case, no cache effects to worry about
1673             result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1674             return;
1675         }
1676 
1677         immutable CACHELIMIT = getCacheLimit;
1678         if (x.length + y.length < CACHELIMIT)
1679             return mulSimple(result, x, y);
1680 
1681         // If x is so big that it won't fit into the cache, we divide it into chunks
1682         // Every chunk must be greater than y.length.
1683         // We make the first chunk shorter, if necessary, to ensure this.
1684 
1685         auto chunksize = CACHELIMIT / y.length;
1686         immutable residual  =  x.length % chunksize;
1687         if (residual < y.length)
1688         {
1689             chunksize -= y.length;
1690         }
1691 
1692         // Use schoolbook multiply.
1693         mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1694         auto done = chunksize;
1695 
1696         while (done < x.length)
1697         {
1698             // result[done .. done+ylength] already has a value.
1699             chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1700             BigDigit [KARATSUBALIMIT] partial;
1701             partial[0 .. y.length] = result[done .. done+y.length];
1702             mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1703             addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1704             done += chunksize;
1705         }
1706         return;
1707     }
1708 
1709     immutable half = (x.length >> 1) + (x.length & 1);
1710     if (2*y.length*y.length <= x.length*x.length)
1711     {
1712         // UNBALANCED MULTIPLY
1713         // Use school multiply to cut into quasi-squares of Karatsuba-size
1714         // or larger. The ratio of the two sides of the 'square' must be
1715         // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1716         //
1717         // For maximum performance, we want the ratio to be as close to
1718         // 1:1 as possible. To achieve this, we can either pad x or y.
1719         // The best choice depends on the modulus x%y.
1720         auto numchunks = x.length / y.length;
1721         auto chunksize = y.length;
1722         auto extra =  x.length % y.length;
1723         auto maxchunk = chunksize + extra;
1724         bool paddingY; // true = we're padding Y, false = we're padding X.
1725         bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1726         if (numchunks == 1 && isExtraSmall)
1727         {
1728             // We divide (x_first_half * y) and (x_last_half * y)
1729             // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1730             // (1.414 ~ 1.707)/2:1 is balanced.
1731             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1732             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1733             scratchbuff = scratchbuff[0 .. $ - y.length];
1734             mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1735             partial[] = result[half .. half + y.length];
1736             mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1737             BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1738             if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1739             if (!__ctfe)
1740             {
1741                 //use free only at runtime
1742                 () @trusted { GC.free(scratchbuff.ptr); } ();
1743             }
1744         }
1745         else
1746         {
1747             if (isExtraSmall)
1748             {
1749                 // The leftover bit is small enough that it should be incorporated
1750                 // in the existing chunks.
1751                 // Make all the chunks a tiny bit bigger
1752                 // (We're padding y with zeros)
1753                 chunksize += extra / numchunks;
1754                 extra = x.length - chunksize*numchunks;
1755                 // there will probably be a few left over.
1756                 // Every chunk will either have size chunksize, or chunksize+1.
1757                 maxchunk = chunksize + 1;
1758                 paddingY = true;
1759                 assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1760                     "Unexpected size");
1761             }
1762             else
1763             {
1764                 // the extra bit is large enough that it's worth making a new chunk.
1765                 // (This means we're padding x with zeros, when doing the first one).
1766                 maxchunk = chunksize;
1767                 ++numchunks;
1768                 paddingY = false;
1769                 assert(extra + chunksize *(numchunks-1) == x.length,
1770                     "Unexpected size");
1771             }
1772             // We make the buffer a bit bigger so we have space for the partial sums.
1773             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1774             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1775             scratchbuff = scratchbuff[0 .. $ - y.length];
1776             size_t done; // how much of X have we done so far?
1777             if (paddingY)
1778             {
1779                 // If the first chunk is bigger, do it first. We're padding y.
1780                 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1781                     x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1782                 done = chunksize + (extra > 0 ? 1 : 0);
1783                 if (extra) --extra;
1784             }
1785             else
1786             {   // We're padding X. Begin with the extra bit.
1787                 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1788                 done = extra;
1789                 extra = 0;
1790             }
1791             immutable basechunksize = chunksize;
1792             while (done < x.length)
1793             {
1794                 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1795                 if (extra) --extra;
1796                 partial[] = result[done .. done+y.length];
1797                 mulKaratsuba(result[done .. done + y.length + chunksize],
1798                         x[done .. done+chunksize], y, scratchbuff);
1799                 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1800                 done += chunksize;
1801             }
1802             if (!__ctfe)
1803             {
1804                 //use free only at runtime
1805                 () @trusted { GC.free(scratchbuff.ptr); } ();
1806             }
1807         }
1808     }
1809     else
1810     {
1811         // Balanced. Use Karatsuba directly.
1812         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1813         mulKaratsuba(result, x, y, scratchbuff);
1814         if (!__ctfe)
1815         {
1816             //use free only at runtime
1817             () @trusted { GC.free(scratchbuff.ptr); } ();
1818         }
1819     }
1820 }
1821 
1822 // https://issues.dlang.org/show_bug.cgi?id=20493
1823 @safe unittest
1824 {
1825     // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1826     // the number itself isn't important, only the amount of digits, so we do a simpler
1827     // multiplication of the same size, analogous to:
1828     // 11111111 * 11111111 = 0123456787654321
1829     // but instead of base 10, it's in base `BigDigit`
1830 
1831     BigDigit[398] x = 1;
1832     BigDigit[236] y = 1;
1833     BigDigit[x.length + y.length] result;
1834     mulInternal(result[], x[], y[]);
1835 
1836     // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1837     BigDigit[x.length + y.length] expected = y.length;
1838     foreach (BigDigit i; 0 .. y.length)
1839     {
1840         expected[i] = i+1;
1841         expected[$-1-i] = i;
1842     }
1843 
1844     assert(result == expected);
1845 }
1846 
1847 /**  General unsigned squaring routine for BigInts.
1848  *   Sets result = x*x.
1849  *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1850  *   also be zero.
1851  */
1852 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1853 {
1854     import core.memory : GC;
1855     // Squaring is potentially half a multiply, plus add the squares of
1856     // the diagonal elements.
1857     assert(result.length == 2*x.length,
1858         "result needs to have twice the capacity of x");
1859     if (x.length <= KARATSUBASQUARELIMIT)
1860     {
1861         if (x.length == 1)
1862         {
1863             result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1864             return;
1865         }
1866         return squareSimple(result, x);
1867     }
1868     // The nice thing about squaring is that it always stays balanced
1869     BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1870     squareKaratsuba(result, x, scratchbuff);
1871     if (!__ctfe)
1872     {
1873         //use free only at runtime
1874         () @trusted { GC.free(scratchbuff.ptr); } ();
1875     }
1876 }
1877 
1878 
1879 import core.bitop : bsr;
1880 
1881 /// if remainder is null, only calculate quotient.
1882 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1883         const BigDigit [] v) pure nothrow @safe
1884 {
1885     import core.memory : GC;
1886     assert(quotient.length == u.length - v.length + 1,
1887         "Invalid quotient length");
1888     assert(remainder == null || remainder.length == v.length,
1889         "Invalid remainder");
1890     assert(v.length > 1, "v must have more than 1 element");
1891     assert(u.length >= v.length, "u must be as longer or longer than v");
1892 
1893     // Normalize by shifting v left just enough so that
1894     // its high-order bit is on, and shift u left the
1895     // same amount. The highest bit of u will never be set.
1896 
1897     BigDigit [] vn = new BigDigit[v.length];
1898     BigDigit [] un = new BigDigit[u.length + 1];
1899     // How much to left shift v, so that its MSB is set.
1900     uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1901     if (s != 0)
1902     {
1903         multibyteShl(vn, v, s);
1904         un[$-1] = multibyteShl(un[0..$-1], u, s);
1905     }
1906     else
1907     {
1908         vn[] = v[];
1909         un[0..$-1] = u[];
1910         un[$-1] = 0;
1911     }
1912     if (quotient.length<FASTDIVLIMIT)
1913     {
1914         schoolbookDivMod(quotient, un, vn);
1915     }
1916     else
1917     {
1918         blockDivMod(quotient, un, vn);
1919     }
1920 
1921     // Unnormalize remainder, if required.
1922     if (remainder != null)
1923     {
1924         if (s == 0) remainder[] = un[0 .. vn.length];
1925         else multibyteShr(remainder, un[0 .. vn.length+1], s);
1926     }
1927     if (!__ctfe)
1928     {
1929         //use free only at runtime
1930         () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1931     }
1932 }
1933 
1934 pure @safe unittest
1935 {
1936     immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1937     immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1938     uint [] q = new uint[u.length - v.length + 1];
1939     uint [] r = new uint[2];
1940     divModInternal(q, r, u, v);
1941     assert(q[]==[0xFFFF_FFFFu, 0]);
1942     assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1943     u = [0, 0xFFFF_FFFE, 0x8000_0001];
1944     v = [0xFFFF_FFFF, 0x8000_0000];
1945     divModInternal(q, r, u, v);
1946 }
1947 
1948 
1949 // Converts a big uint to a hexadecimal string.
1950 //
1951 // Optionally, a separator character (eg, an underscore) may be added between
1952 // every 8 digits.
1953 // buff.length must be data.length*8 if separator is zero,
1954 // or data.length*9 if separator is non-zero. It will be completely filled.
1955 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1956         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1957 {
1958     int x=0;
1959     for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1960     {
1961         toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1962         x+=8;
1963         if (separator)
1964         {
1965             if (i>0) buff[x] = separator;
1966             ++x;
1967         }
1968     }
1969     return buff;
1970 }
1971 
1972 /**
1973  * Convert a big uint into an octal string.
1974  *
1975  * Params:
1976  *  buff = The destination buffer for the octal string. Must be large enough to
1977  *      store the result, including leading zeroes, which is
1978  *      ceil(data.length * BigDigitBits / 3) characters.
1979  *      The buffer is filled from back to front, starting from `buff[$-1]`.
1980  *  data = The biguint to be converted.
1981  *
1982  * Returns: The index of the leading non-zero digit in `buff`. Will be
1983  * `buff.length - 1` if the entire big uint is zero.
1984  */
1985 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1986     pure nothrow @safe @nogc
1987 {
1988     ubyte carry = 0;
1989     int shift = 0;
1990     size_t penPos = buff.length - 1;
1991     size_t lastNonZero = buff.length - 1;
1992 
1993     pragma(inline) void output(uint digit) @nogc nothrow
1994     {
1995         if (digit != 0)
1996             lastNonZero = penPos;
1997         buff[penPos--] = cast(char)('0' + digit);
1998     }
1999 
2000     foreach (bigdigit; data)
2001     {
2002         if (shift < 0)
2003         {
2004             // Some bits were carried over from previous word.
2005             assert(shift > -3, "shift must be greater than -3");
2006             output(((bigdigit << -shift) | carry) & 0b111);
2007             shift += 3;
2008             assert(shift > 0, "shift must be 1 or greater");
2009         }
2010 
2011         while (shift <= BigDigitBits - 3)
2012         {
2013             output((bigdigit >>> shift) & 0b111);
2014             shift += 3;
2015         }
2016 
2017         if (shift < BigDigitBits)
2018         {
2019             // Some bits need to be carried forward.
2020             carry = (bigdigit >>> shift) & 0b11;
2021         }
2022         shift -= BigDigitBits;
2023         assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
2024     }
2025 
2026     if (shift < 0)
2027     {
2028         // Last word had bits that haven't been output yet.
2029         assert(shift > -3, "Shift must be greater than -3");
2030         output(carry);
2031     }
2032 
2033     return lastNonZero;
2034 }
2035 
2036 /** Convert a big uint into a decimal string.
2037  *
2038  * Params:
2039  *  data    The biguint to be converted. Will be destroyed.
2040  *  buff    The destination buffer for the decimal string. Must be
2041  *          large enough to store the result, including leading zeros.
2042  *          Will be filled backwards, starting from buff[$-1].
2043  *
2044  * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2045  * Returns:
2046  *    the lowest index of buff which was used.
2047  */
2048 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2049 {
2050     ptrdiff_t sofar = buff.length;
2051     // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2052     // the price of 3 divisions and a shr; this version only gives 27 digits
2053     // for 3 divisions.
2054     while (data.length>1)
2055     {
2056         uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2057         itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2058         sofar -= 9;
2059         if (data[$-1] == 0 && data.length > 1)
2060         {
2061             data.length = data.length - 1;
2062         }
2063     }
2064     itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2065     sofar -= 10;
2066     // and strip off the leading zeros
2067     while (sofar != buff.length-1 && buff[sofar] == '0')
2068         sofar++;
2069     return sofar;
2070 }
2071 
2072 /** Convert a decimal string into a big uint.
2073  *
2074  * Params:
2075  *  data    The biguint to be receive the result. Must be large enough to
2076  *          store the result.
2077  *  s       The decimal string. May contain _ or 0 .. 9
2078  *
2079  * The required length for the destination buffer is slightly less than
2080  *  1 + s.length/log2(10) = 1 + s.length/3.3219.
2081  *
2082  * Returns:
2083  *    the highest index of data which was used.
2084  */
2085 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2086 if (
2087     isInputRange!Range &&
2088     isSomeChar!(ElementType!Range) &&
2089     !isInfinite!Range)
2090 in
2091 {
2092     static if (hasLength!Range)
2093         assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2094             "data has a invalid length");
2095 }
2096 do
2097 {
2098     import std.conv : ConvException;
2099 
2100     // Convert to base 1e19 = 10_000_000_000_000_000_000.
2101     // (this is the largest power of 10 that will fit into a long).
2102     // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2103     // 485 bits will only just fit into 146 decimal digits.
2104     // As we convert the string, we record the number of digits we've seen in base 19:
2105     // hi is the number of digits/19, lo is the extra digits (0 to 18).
2106     // TODO: This is inefficient for very large strings (it is O(n^^2)).
2107     // We should take advantage of fast multiplication once the numbers exceed
2108     // Karatsuba size.
2109     uint lo = 0; // number of powers of digits, 0 .. 18
2110     uint x = 0;
2111     ulong y = 0;
2112     uint hi = 0; // number of base 1e19 digits
2113     data[0] = 0; // initially number is 0.
2114     if (data.length > 1)
2115         data[1] = 0;
2116 
2117     foreach (character; s)
2118     {
2119         if (character == '_')
2120             continue;
2121 
2122         if (character < '0' || character > '9')
2123             throw new ConvException("invalid digit");
2124         x *= 10;
2125         x += character - '0';
2126         ++lo;
2127         if (lo == 9)
2128         {
2129             y = x;
2130             x = 0;
2131         }
2132         if (lo == 18)
2133         {
2134             y *= 10_0000_0000;
2135             y += x;
2136             x = 0;
2137         }
2138         if (lo == 19)
2139         {
2140             y *= 10;
2141             y += x;
2142             x = 0;
2143             // Multiply existing number by 10^19, then add y1.
2144             if (hi>0)
2145             {
2146                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2147                 ++hi;
2148                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2149                 ++hi;
2150             }
2151             else
2152                 hi = 2;
2153             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2154             c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2155             if (c != 0)
2156             {
2157                 data[hi]=c;
2158                 ++hi;
2159             }
2160             y = 0;
2161             lo = 0;
2162         }
2163     }
2164     // Now set y = all remaining digits.
2165     if (lo >= 18)
2166     {
2167     }
2168     else if (lo >= 9)
2169     {
2170         for (int k=9; k<lo; ++k) y*=10;
2171         y+=x;
2172     }
2173     else
2174     {
2175         for (int k=0; k<lo; ++k) y*=10;
2176         y+=x;
2177     }
2178     if (lo != 0)
2179     {
2180         if (hi == 0)
2181         {
2182             data[0] = cast(uint) y;
2183             if (data.length == 1)
2184             {
2185                 hi = 1;
2186             }
2187             else
2188             {
2189                 data[1] = cast(uint)(y >>> 32);
2190                 hi=2;
2191             }
2192         }
2193         else
2194         {
2195             while (lo>0)
2196             {
2197                 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2198                 if (c != 0)
2199                 {
2200                     data[hi]=c;
2201                     ++hi;
2202                 }
2203                 --lo;
2204             }
2205             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2206             if (y > 0xFFFF_FFFFL)
2207             {
2208                 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2209             }
2210             if (c != 0)
2211             {
2212                 data[hi]=c;
2213                 ++hi;
2214             }
2215         }
2216     }
2217     while (hi>1 && data[hi-1]==0)
2218         --hi;
2219     return hi;
2220 }
2221 
2222 
2223 // ------------------------
2224 // These in-place functions are only for internal use; they are incompatible
2225 // with COW.
2226 
2227 // Classic 'schoolbook' multiplication.
2228 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2229         const(BigDigit)[] right) pure nothrow @safe
2230 in
2231 {
2232     assert(result.length == left.length + right.length,
2233         "Result must be able to store left + right");
2234     assert(right.length>1, "right must not be empty");
2235 }
2236 do
2237 {
2238     result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2239     multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2240 }
2241 
2242 // Classic 'schoolbook' squaring
2243 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2244 in
2245 {
2246     assert(result.length == 2*x.length, "result must be twice as long as x");
2247     assert(x.length>1, "x must not be empty");
2248 }
2249 do
2250 {
2251     multibyteSquare(result, x);
2252 }
2253 
2254 
2255 // add two uints of possibly different lengths. Result must be as long
2256 // as the larger length.
2257 // Returns carry (0 or 1).
2258 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2259 pure nothrow @safe
2260 in
2261 {
2262     assert(result.length == left.length,
2263         "result and left must be of the same length");
2264     assert(left.length >= right.length,
2265         "left must be longer or of equal length to right");
2266     assert(right.length > 0, "right must not be empty");
2267 }
2268 do
2269 {
2270     uint carry = multibyteAdd(result[0 .. right.length],
2271             left[0 .. right.length], right, 0);
2272     if (right.length < left.length)
2273     {
2274         result[right.length .. left.length] = left[right.length .. $];
2275         carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2276     }
2277     return carry;
2278 }
2279 
2280 /* result = result - right
2281  * Returns carry = 1 if result was less than right.
2282 */
2283 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2284 pure nothrow @safe
2285 {
2286     assert(result.length >= right.length,
2287        "result must be longer or of equal length to right");
2288     uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2289     if (c && result.length > right.length)
2290         c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2291     return c;
2292 }
2293 
2294 /* result = result + right
2295 */
2296 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2297 pure nothrow @safe
2298 {
2299     assert(result.length >= right.length,
2300        "result must be longer or of equal length to right");
2301     uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2302     if (c && result.length > right.length)
2303        c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2304     return c;
2305 }
2306 
2307 /* performs result += wantSub? - right : right;
2308 */
2309 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2310         bool wantSub) pure nothrow @safe
2311 {
2312     if (wantSub)
2313         return subAssignSimple(result, right);
2314     else
2315         return addAssignSimple(result, right);
2316 }
2317 
2318 
2319 // return true if x<y, considering leading zeros
2320 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2321 {
2322     assert(x.length >= y.length,
2323        "x must be longer or of equal length to y");
2324     auto k = x.length-1;
2325     while (x[k]==0 && k >= y.length)
2326         --k;
2327     if (k >= y.length)
2328         return false;
2329     while (k>0 && x[k]==y[k])
2330         --k;
2331     return x[k] < y[k];
2332 }
2333 
2334 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2335 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2336     pure nothrow @safe
2337 {
2338     assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2339         "result must capable to store the maximum of x and y");
2340 
2341     size_t minlen;
2342     bool negative;
2343     if (x.length >= y.length)
2344     {
2345         minlen = y.length;
2346         negative = less(x, y);
2347     }
2348     else
2349     {
2350        minlen = x.length;
2351        negative = !less(y, x);
2352     }
2353     const (BigDigit)[] large, small;
2354     if (negative)
2355     {
2356         large = y; small = x;
2357     }
2358     else
2359     {
2360         large = x; small = y;
2361     }
2362 
2363     BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2364     if (x.length != y.length)
2365     {
2366         result[minlen .. large.length]= large[minlen..$];
2367         result[large.length..$] = 0;
2368         if (carry)
2369             multibyteIncrementAssign!('-')(result[minlen..$], carry);
2370     }
2371     return negative;
2372 }
2373 
2374 /* Determine how much space is required for the temporaries
2375  * when performing a Karatsuba multiplication.
2376  * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2377  * https://issues.dlang.org/show_bug.cgi?id=20493
2378  */
2379 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2380 {
2381     return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2382 }
2383 
2384 /* Sets result = x*y, using Karatsuba multiplication.
2385 * x must be longer or equal to y.
2386 * Valid only for balanced multiplies, where x is not shorter than y.
2387 * It is superior to schoolbook multiplication if and only if
2388 *    sqrt(2)*y.length > x.length > y.length.
2389 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2390 * The maximum allowable length of x and y is uint.max; but better algorithms
2391 * should be used far before that length is reached.
2392 * Params:
2393 * scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2394 */
2395 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2396         const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2397 {
2398     assert(x.length >= y.length, "x must be greater or equal to y");
2399     assert(result.length < uint.max, "Operands too large");
2400     assert(result.length == x.length + y.length,
2401         "result must be as large as x + y");
2402     if (x.length <= KARATSUBALIMIT)
2403     {
2404         return mulSimple(result, x, y);
2405     }
2406     // Must be almost square (otherwise, a schoolbook iteration is better)
2407     assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2408         "Bigint Internal Error: Asymmetric Karatsuba");
2409 
2410     // The subtractive version of Karatsuba multiply uses the following result:
2411     // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2412     // where mid = (x0-x1)*(y0-y1)
2413     // requiring 3 multiplies of length N, instead of 4.
2414     // The advantage of the subtractive over the additive version is that
2415     // the mid multiply cannot exceed length N. But there are subtleties:
2416     // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2417     // retain all of the leading zeros in the subtractions
2418 
2419     // half length, round up.
2420     auto half = (x.length >> 1) + (x.length & 1);
2421 
2422     const(BigDigit) [] x0 = x[0 .. half];
2423     const(BigDigit) [] x1 = x[half .. $];
2424     const(BigDigit) [] y0 = y[0 .. half];
2425     const(BigDigit) [] y1 = y[half .. $];
2426     BigDigit [] mid = scratchbuff[0 .. half*2];
2427     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2428     BigDigit [] resultLow = result[0 .. 2*half];
2429     BigDigit [] resultHigh = result[2*half .. $];
2430      // initially use result to store temporaries
2431     BigDigit [] xdiff= result[0 .. half];
2432     BigDigit [] ydiff = result[half .. half*2];
2433 
2434     // First, we calculate mid, and sign of mid
2435     immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2436                       ^ inplaceSub(ydiff, y0, y1);
2437     mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2438 
2439     // Low half of result gets x0 * y0. High half gets x1 * y1
2440 
2441     mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2442 
2443     if (2L * y1.length * y1.length < x1.length * x1.length)
2444     {
2445         // an asymmetric situation has been created.
2446         // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2447         // Applying one schoolbook multiply gives us two pieces each 1.2:1
2448         if (y1.length <= KARATSUBALIMIT)
2449             mulSimple(resultHigh, x1, y1);
2450         else
2451         {
2452             // divide x1 in two, then use schoolbook multiply on the two pieces.
2453             auto quarter = (x1.length >> 1) + (x1.length & 1);
2454             immutable ysmaller = (quarter >= y1.length);
2455             mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2456                 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2457             // Save the part which will be overwritten.
2458             immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2459             newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2460             mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2461                 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2462 
2463             resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2464         }
2465     }
2466     else
2467         mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2468 
2469     /* We now have result = x0y0 + (N*N)*x1y1
2470        Before adding or subtracting mid, we must calculate
2471        result += N * (x0y0 + x1y1)
2472        We can do this with three half-length additions. With a = x0y0, b = x1y1:
2473                       aHI aLO
2474         +       aHI   aLO
2475         +       bHI   bLO
2476         +  bHI  bLO
2477         =  R3   R2    R1   R0
2478         R1 = aHI + bLO + aLO
2479         R2 = aHI + bLO + aHI + carry_from_R1
2480         R3 = bHi + carry_from_R2
2481 
2482      It might actually be quicker to do it in two full-length additions:
2483      newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2484      addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2485    */
2486     BigDigit[] R1 = result[half .. half*2];
2487     BigDigit[] R2 = result[half*2 .. half*3];
2488     BigDigit[] R3 = result[half*3..$];
2489     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2490     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2491     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2492     if (c1+c2)
2493         multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2494     if (c1+c3)
2495         multibyteIncrementAssign!('+')(R3, c1+c3);
2496 
2497     // And finally we subtract mid
2498     addOrSubAssignSimple(result[half..$], mid, !midNegative);
2499 }
2500 
2501 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2502         BigDigit [] scratchbuff) pure nothrow @safe
2503 {
2504     // See mulKaratsuba for implementation comments.
2505     // Squaring is simpler, since it never gets asymmetric.
2506     assert(result.length < uint.max, "Operands too large");
2507     assert(result.length == 2*x.length,
2508         "result must be twice the length of x");
2509     if (x.length <= KARATSUBASQUARELIMIT)
2510     {
2511         return squareSimple(result, x);
2512     }
2513     // half length, round up.
2514     auto half = (x.length >> 1) + (x.length & 1);
2515 
2516     const(BigDigit)[] x0 = x[0 .. half];
2517     const(BigDigit)[] x1 = x[half .. $];
2518     BigDigit [] mid = scratchbuff[0 .. half*2];
2519     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2520      // initially use result to store temporaries
2521     BigDigit [] xdiff= result[0 .. half];
2522     const BigDigit [] ydiff = result[half .. half*2];
2523 
2524     // First, we calculate mid. We don't need its sign
2525     inplaceSub(xdiff, x0, x1);
2526     squareKaratsuba(mid, xdiff, newscratchbuff);
2527 
2528     // Set result = x0x0 + (N*N)*x1x1
2529     squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2530     squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2531 
2532     /* result += N * (x0x0 + x1x1)
2533        Do this with three half-length additions. With a = x0x0, b = x1x1:
2534         R1 = aHI + bLO + aLO
2535         R2 = aHI + bLO + aHI + carry_from_R1
2536         R3 = bHi + carry_from_R2
2537     */
2538     BigDigit[] R1 = result[half .. half*2];
2539     BigDigit[] R2 = result[half*2 .. half*3];
2540     BigDigit[] R3 = result[half*3..$];
2541     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2542     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2543     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2544     if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2545     if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2546 
2547     // And finally we subtract mid, which is always positive
2548     subAssignSimple(result[half..$], mid);
2549 }
2550 
2551 /* Knuth's Algorithm D, as presented in
2552  * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2553  * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2554  * Given u and v, calculates  quotient  = u / v, u = u % v.
2555  * v must be normalized (ie, the MSB of v must be 1).
2556  * The most significant words of quotient and u may be zero.
2557  * u[0 .. v.length] holds the remainder.
2558  */
2559 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2560     pure nothrow @safe
2561 {
2562     assert(quotient.length == u.length - v.length,
2563         "quotient has wrong length");
2564     assert(v.length > 1, "v must not be empty");
2565     assert(u.length >= v.length, "u must be larger or equal to v");
2566     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2567     assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2568     // BUG: This code only works if BigDigit is uint.
2569     uint vhi = v[$-1];
2570     uint vlo = v[$-2];
2571 
2572     for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2573     {
2574         // Compute estimate of quotient[j],
2575         // qhat = (three most significant words of u)/(two most sig words of v).
2576         uint qhat;
2577         if (u[j + v.length] == vhi)
2578         {
2579             // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2580             qhat = uint.max;
2581         }
2582         else
2583         {
2584             uint ulo = u[j + v.length - 2];
2585             version (D_InlineAsm_X86)
2586             {
2587                 // Note: On DMD, this is only ~10% faster than the non-asm code.
2588                 uint *p = &u[j + v.length - 1];
2589                 asm pure nothrow @trusted
2590                 {
2591                     mov EAX, p;
2592                     mov EDX, [EAX+4];
2593                     mov EAX, [EAX];
2594                     div dword ptr [vhi];
2595                     mov qhat, EAX;
2596                     mov ECX, EDX;
2597 div3by2correction:
2598                     mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2599                     sub EAX, ulo;
2600                     sbb EDX, ECX;
2601                     jbe div3by2done;
2602                     mov EAX, qhat;
2603                     dec EAX;
2604                     mov qhat, EAX;
2605                     add ECX, dword ptr [vhi];
2606                     jnc div3by2correction;
2607 div3by2done:    ;
2608                 }
2609             }
2610             else
2611             { // version (InlineAsm)
2612                 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2613                 immutable bigqhat = uu / vhi;
2614                 ulong rhat =  uu - bigqhat * vhi;
2615                 qhat = cast(uint) bigqhat;
2616 again:
2617                 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2618                 {
2619                     --qhat;
2620                     rhat += vhi;
2621                     if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2622                         goto again;
2623                 }
2624             } // version (InlineAsm)
2625         }
2626         // Multiply and subtract.
2627         uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2628 
2629         if (u[j+v.length] < carry)
2630         {
2631             // If we subtracted too much, add back
2632             --qhat;
2633             carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2634         }
2635         quotient[j] = qhat;
2636         u[j + v.length] = u[j + v.length] - carry;
2637     }
2638 }
2639 
2640 // TODO: Replace with a library call
2641 void itoaZeroPadded(char[] output, uint value)
2642     pure nothrow @safe @nogc
2643 {
2644     for (auto i = output.length; i--;)
2645     {
2646         if (value < 10)
2647         {
2648             output[i] = cast(char)(value + '0');
2649             value = 0;
2650         }
2651         else
2652         {
2653             output[i] = cast(char)(value % 10 + '0');
2654             value /= 10;
2655         }
2656     }
2657 }
2658 
2659 void toHexZeroPadded(char[] output, uint value,
2660         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2661 {
2662     ptrdiff_t x = output.length - 1;
2663     static immutable string upperHexDigits = "0123456789ABCDEF";
2664     static immutable string lowerHexDigits = "0123456789abcdef";
2665     for ( ; x >= 0; --x)
2666     {
2667         if (letterCase == LetterCase.upper)
2668         {
2669             output[x] = upperHexDigits[value & 0xF];
2670         }
2671         else
2672         {
2673             output[x] = lowerHexDigits[value & 0xF];
2674         }
2675         value >>= 4;
2676     }
2677 }
2678 
2679 // Returns the highest value of i for which left[i]!=right[i],
2680 // or 0 if left[] == right[]
2681 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2682 pure nothrow @nogc @safe
2683 {
2684     assert(left.length == right.length,
2685         "left have a length equal to that of right");
2686     for (ptrdiff_t i = left.length - 1; i>0; --i)
2687     {
2688         if (left[i] != right[i])
2689             return i;
2690     }
2691     return 0;
2692 }
2693 
2694 // Returns the lowest value of i for which x[i]!=0.
2695 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2696 {
2697     int k = 0;
2698     while (x[k]==0)
2699     {
2700         ++k;
2701         assert(k < x.length, "k must be less than x.length");
2702     }
2703     return k;
2704 }
2705 
2706 /*
2707     Calculate quotient and remainder of u / v using fast recursive division.
2708     v must be normalised, and must be at least half as long as u.
2709     Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2710     scratch is temporary storage space, length must be >= quotient + 1.
2711 
2712 Returns:
2713     u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2714 
2715     Implements algorithm 1.8 from MCA.
2716     This algorithm has an annoying special case. After the first recursion, the
2717     highest bit of the quotient may be set. This means that in the second
2718     recursive call, the 'in' contract would be violated. (This happens only
2719     when the top quarter of u is equal to the top half of v. A base 10
2720     equivalent example of this situation is 5517/56; the first step gives
2721     55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2722     u and the quotient. 'mayOverflow' indicates that that the special case
2723     has occurred.
2724     (In MCA, a different strategy is used: the in contract is weakened, and
2725     schoolbookDivMod is more general: it allows the high bit of u to be set).
2726     See also:
2727     - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2728       Max-Planck Institute fuer Informatik, (Oct 1998).
2729 */
2730 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2731                      BigDigit[] scratch, bool mayOverflow = false)
2732                      pure nothrow @safe
2733 in
2734 {
2735     // v must be normalized
2736     assert(v.length > 1, "v must not be empty");
2737     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2738     assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2739     assert(quotient.length == u.length - v.length,
2740         "quotient must be of equal length of u - v");
2741     if (mayOverflow)
2742     {
2743         assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2744         assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2745     }
2746 
2747     // Must be symmetric. Use block schoolbook division if not.
2748     assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2749         "Invalid length of u");
2750     assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2751         "Invalid length of u");
2752     assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2753         "Invalid quotient length");
2754 }
2755 do
2756 {
2757     if (quotient.length < FASTDIVLIMIT)
2758     {
2759         return schoolbookDivMod(quotient, u, v);
2760     }
2761 
2762     // Split quotient into two halves, but keep padding in the top half
2763     auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2764 
2765     // RECURSION 1: Calculate the high half of the quotient
2766 
2767     // Note that if u and quotient were padded, they remain padded during
2768     // this call, so in contract is satisfied.
2769     recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2770         scratch, mayOverflow);
2771 
2772     // quotient[k..$] is our guess at the high quotient.
2773     // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2774     // first remainder. u[0 .. 2*k] is the low part.
2775 
2776     // Calculate the full first remainder to be
2777     //    remainder - highQuotient * lowDivisor
2778     // reducing highQuotient until the remainder is positive.
2779     // The low part of the remainder, u[0 .. k], cannot be altered by this.
2780 
2781     adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2782             scratch[0 .. quotient.length], mayOverflow);
2783 
2784     // RECURSION 2: Calculate the low half of the quotient
2785     // The full first remainder is now in u[0 .. k + v.length].
2786 
2787     if (u[k + v.length - 1] & 0x8000_0000)
2788     {
2789         // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2790         // This means we need an extra quotient word for the next recursion.
2791         // We need to restore the invariant for the recursive calls.
2792         // We do this by padding both u and quotient. Extending u is trivial,
2793         // because the higher words will not be used again. But for the
2794         // quotient, we're clobbering the low word of the high quotient,
2795         // so we need save it, and add it back in after the recursive call.
2796 
2797         auto clobberedQuotient = quotient[k];
2798         u[k+v.length] = 0;
2799 
2800         recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2801             v[k .. $], scratch, true);
2802         adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2803             scratch[0 .. 2 * k+1], true);
2804 
2805         // Now add the quotient word that got clobbered earlier.
2806         multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2807     }
2808     else
2809     {
2810         // The special case has NOT happened.
2811         recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2812             scratch, false);
2813 
2814         // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2815 
2816         adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2817             scratch[0 .. 2 * k]);
2818     }
2819 }
2820 
2821 // rem -= quot * v[0 .. k].
2822 // If would make rem negative, decrease quot until rem is >= 0.
2823 // Needs (quot.length * k) scratch space to store the result of the multiply.
2824 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2825         ptrdiff_t k,
2826         BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2827 {
2828     assert(rem.length == v.length, "rem must be as long as v");
2829     mulInternal(scratch, quot, v[0 .. k]);
2830     uint carry = 0;
2831     if (mayOverflow)
2832         carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2833     else
2834         carry = subAssignSimple(rem, scratch);
2835     while (carry)
2836     {
2837         multibyteIncrementAssign!('-')(quot, 1); // quot--
2838         carry -= multibyteAdd(rem, rem, v, 0);
2839     }
2840 }
2841 
2842 // Cope with unbalanced division by performing block schoolbook division.
2843 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2844 pure nothrow @safe
2845 {
2846     import core.memory : GC;
2847     assert(quotient.length == u.length - v.length,
2848         "quotient must be of equal length of u - v");
2849     assert(v.length > 1, "v must not be empty");
2850     assert(u.length >= v.length, "u must be longer or of equal length as v");
2851     assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2852     assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2853     BigDigit [] scratch = new BigDigit[v.length + 1];
2854 
2855     // Perform block schoolbook division, with 'v.length' blocks.
2856     auto m = u.length - v.length;
2857     while (m > v.length)
2858     {
2859         immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2860         BigDigit saveq;
2861         if (mayOverflow)
2862         {
2863             u[m + v.length] = 0;
2864             saveq = quotient[m];
2865         }
2866         recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2867             u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2868         if (mayOverflow)
2869         {
2870             assert(quotient[m] == 0, "quotient must not be 0");
2871             quotient[m] = saveq;
2872         }
2873         m -= v.length;
2874     }
2875     recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2876     if (!__ctfe)
2877     {
2878         //use free only at runtime
2879         () @trusted { GC.free(scratch.ptr); } ();
2880     }
2881 }
2882 
2883 @system unittest
2884 {
2885     version (none)
2886     {
2887         import core.stdc.stdio;
2888 
2889         void printBiguint(const uint [] data)
2890         {
2891             char [] buff = biguintToHex(new char[data.length*9], data, '_');
2892             printf("%.*s\n", cast(int) buff.length, buff.ptr);
2893         }
2894 
2895         void printDecimalBigUint(BigUint data)
2896         {
2897             auto str = data.toDecimalString(0);
2898             printf("%.*s\n", cast(int) str.length, str.ptr);
2899         }
2900     }
2901 
2902     uint [] a, b;
2903     a = new uint[43];
2904     b = new uint[179];
2905     for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2906     for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2907 
2908     a[$-1] |= 0x8000_0000;
2909     uint [] r = new uint[a.length];
2910     uint [] q = new uint[b.length-a.length+1];
2911 
2912     divModInternal(q, r, b, a);
2913     q = q[0..$-1];
2914     uint [] r1 = r.dup;
2915     uint [] q1 = q.dup;
2916     blockDivMod(q, b, a);
2917     r = b[0 .. a.length];
2918     assert(r[] == r1[]);
2919     assert(q[] == q1[]);
2920 }
2921 
2922 // biguintToOctal
2923 @safe unittest
2924 {
2925     enum bufSize = 5 * BigDigitBits / 3 + 1;
2926     auto buf = new char[bufSize];
2927     size_t i;
2928     BigDigit[] data = [ 342391 ];
2929 
2930     // Basic functionality with single word
2931     i = biguintToOctal(buf, data);
2932     assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2933 
2934     // Test carrying bits between words
2935     data = [ 0x77053977, 0x39770539, 0x00000005 ];
2936     i = biguintToOctal(buf, data);
2937     assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2938 
2939     // Test carried bits in the last word
2940     data = [ 0x80000000 ];
2941     i = biguintToOctal(buf, data);
2942     assert(buf[i .. $] == "20000000000");
2943 
2944     // Test boundary between 3rd and 4th word where the number of bits is
2945     // divisible by 3 and no bits should be carried.
2946     //
2947     // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2948     // rollover happens, so that if any bugs happen in wrongly adding the carry
2949     // to the next word, non-zero bits will show up in the output.
2950     data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2951     i = biguintToOctal(buf, data);
2952     assert(buf[i .. $] == "2060000000001400000000030000000000");
2953 
2954     // Boundary case: 0
2955     data = [ 0 ];
2956     i = biguintToOctal(buf, data);
2957     assert(buf[i .. $] == "0");
2958 }