1 /** Fundamental operations for arbitrary-precision arithmetic
2  *
3  * These functions are for internal use only.
4  */
5 /*          Copyright Don Clugston 2008 - 2010.
6  * Distributed under the Boost Software License, Version 1.0.
7  *    (See accompanying file LICENSE_1_0.txt or copy at
8  *          http://www.boost.org/LICENSE_1_0.txt)
9  */
10 /* References:
11    "Modern Computer Arithmetic" (MCA) is the primary reference for all
12     algorithms used in this library.
13   - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14     Version 0.5.9, (Oct 2010).
15   - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16     Max-Planck Institute fuer Informatik, (Oct 1998).
17   - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18     INRIA 4664, (Dec 2002).
19   - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20     http://bodrato.it/papers (2006).
21   - A. Fog, "Optimizing subroutines in assembly language",
22     www.agner.org/optimize (2008).
23   - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24     www.agner.org/optimize (2008).
25   - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26     and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27 
28 Idioms:
29   Many functions in this module use
30   'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31   in order to disable implicit conversion.
32 
33 */
34 module std.internal.math.biguintcore;
35 
36 version (D_InlineAsm_X86)
37 {
38     static import std.internal.math.biguintx86;
39 }
40 static import std.internal.math.biguintnoasm;
41 
42 import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43     KARATSUBASQUARELIMIT;
44 
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47 
48 private import std.traits;
49 private import std.range.primitives;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53 
54 private:
55 
56 // dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57 // 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58 // used by the other architectures too.
59 // See comments below in case it has to be refactored.
60 version (X86)
61 uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62 {
63     // must be checked before, otherwise D_InlineAsm_X86 is true.
64     if (__ctfe)
65         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66     // Runtime.
67     else version (D_InlineAsm_X86)
68         return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69     // Runtime if no asm available.
70     else
71         return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72 }
73 // Any other architecture
74 else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75 
76 version (X86)
77 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78 {
79     if (__ctfe)
80         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81     else version (D_InlineAsm_X86)
82         return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83     else
84         return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85 }
86 else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87 
88 version (X86)
89 uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90 {
91     if (__ctfe)
92         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93     else version (D_InlineAsm_X86)
94         return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95     else
96         return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97 }
98 else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99 
100 version (X86)
101 void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102 {
103     if (__ctfe)
104         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105     else version (D_InlineAsm_X86)
106         std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107     else
108         std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109 }
110 else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111 
112 version (X86)
113 uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114 {
115     if (__ctfe)
116         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117     else version (D_InlineAsm_X86)
118         return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119     else
120         return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121 }
122 else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123 
124 version (X86)
125 uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126 {
127     if (__ctfe)
128         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129     else version (D_InlineAsm_X86)
130         return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131     else
132         return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133 }
134 else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135 
136 version (X86)
137 void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138 {
139     if (__ctfe)
140         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141     else version (D_InlineAsm_X86)
142         std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143     else
144         std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145 }
146 else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147 
148 version (X86)
149 uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150 {
151     if (__ctfe)
152         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153     else version (D_InlineAsm_X86)
154         return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155     else
156         return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157 }
158 else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159 
160 version (X86)
161 void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162 {
163     if (__ctfe)
164         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165     else version (D_InlineAsm_X86)
166         std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167     else
168         std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169 }
170 else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171 
172 version (X86)
173 void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174 {
175     if (__ctfe)
176         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177     else version (D_InlineAsm_X86)
178         std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179     else
180         std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181 }
182 else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183 
184 version (X86)
185 void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186 {
187     if (__ctfe)
188         std.internal.math.biguintnoasm.multibyteSquare(result, x);
189     else version (D_InlineAsm_X86)
190         std.internal.math.biguintx86.multibyteSquare(result, x);
191     else
192         std.internal.math.biguintnoasm.multibyteSquare(result, x);
193 }
194 else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195 
196 // Limits for when to switch between algorithms.
197 // Half the size of the data cache.
198 @nogc nothrow pure @safe size_t getCacheLimit()
199 {
200     import core.cpuid : dataCaches;
201     return dataCaches[0].size * 1024 / 2;
202 }
203 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204 
205 
206 // These constants are used by shift operations
207 static if (BigDigit.sizeof == int.sizeof)
208 {
209     enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210     alias BIGHALFDIGIT = ushort;
211 }
212 else static if (BigDigit.sizeof == long.sizeof)
213 {
214     alias BIGHALFDIGIT = uint;
215     enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216 }
217 else static assert(0, "Unsupported BigDigit size");
218 
219 import std.exception : assumeUnique;
220 import std.traits : isIntegral;
221 enum BigDigitBits = BigDigit.sizeof*8;
222 template maxBigDigits(T)
223 if (isIntegral!T)
224 {
225     enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226 }
227 
228 static immutable BigDigit[] ZERO = [0];
229 static immutable BigDigit[] ONE = [1];
230 static immutable BigDigit[] TWO = [2];
231 static immutable BigDigit[] TEN = [10];
232 
233 
234 public:
235 
236 /// BigUint performs memory management and wraps the low-level calls.
237 struct BigUint
238 {
239 private:
240     pure invariant()
241     {
242         assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243                 "Invariant requires data to not empty or zero");
244     }
245 
246     immutable(BigDigit) [] data = ZERO;
247 
248     this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249     {
250        data = x;
251     }
252   package(std)  // used from: std.bigint
253     this(T)(T x) pure nothrow @safe scope
254     if (isIntegral!T)
255     {
256         opAssign(x);
257     }
258 
259     enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
260         return assumeUnique(input);
261     };
262 public:
263     // Length in uints
264     @property size_t uintLength() pure nothrow const @safe @nogc scope
265     {
266         static if (BigDigit.sizeof == uint.sizeof)
267         {
268             return data.length;
269         }
270         else static if (BigDigit.sizeof == ulong.sizeof)
271         {
272             return data.length * 2 -
273             ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
274         }
275     }
276     @property size_t ulongLength() pure nothrow const @safe @nogc scope
277     {
278         static if (BigDigit.sizeof == uint.sizeof)
279         {
280             return (data.length + 1) >> 1;
281         }
282         else static if (BigDigit.sizeof == ulong.sizeof)
283         {
284             return data.length;
285         }
286     }
287 
288     // The value at (cast(ulong[]) data)[n]
289     ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
290     {
291         static if (BigDigit.sizeof == int.sizeof)
292         {
293             if (data.length == n*2 + 1) return data[n*2];
294             return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
295         }
296         else static if (BigDigit.sizeof == long.sizeof)
297         {
298             return data[n];
299         }
300     }
301 
302     uint peekUint(size_t n) pure nothrow const @safe @nogc scope
303     {
304         static if (BigDigit.sizeof == int.sizeof)
305         {
306             return data[n];
307         }
308         else
309         {
310             immutable x = data[n >> 1];
311             return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
312         }
313     }
314 
315     ///
316     void opAssign(Tulong)(Tulong u) pure nothrow @safe scope
317     if (is (Tulong == ulong))
318     {
319         if (u == 0) data = ZERO;
320         else if (u == 1) data = ONE;
321         else if (u == 2) data = TWO;
322         else if (u == 10) data = TEN;
323         else
324         {
325             static if (BigDigit.sizeof == int.sizeof)
326             {
327                 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
328                 uint uhi = cast(uint)(u >> 32);
329                 if (uhi == 0)
330                 {
331                     data = [ulo];
332                 }
333                 else
334                 {
335                     data = [ulo, uhi];
336                 }
337             }
338             else static if (BigDigit.sizeof == long.sizeof)
339             {
340                 data = [u];
341             }
342         }
343     }
344     void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
345     {
346         this.data = y.data;
347     }
348 
349     ///
350     int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
351     {
352         if (data.length != y.data.length)
353             return (data.length > y.data.length) ?  1 : -1;
354         size_t k = highestDifferentDigit(data, y.data);
355         if (data[k] == y.data[k])
356             return 0;
357         return data[k] > y.data[k] ? 1 : -1;
358     }
359 
360     ///
361     int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope
362     if (is (Tulong == ulong))
363     {
364         if (data.length > maxBigDigits!Tulong)
365             return 1;
366 
367         foreach_reverse (i; 0 .. maxBigDigits!Tulong)
368         {
369             BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
370             if (tmp == 0)
371                 if (data.length >= i+1)
372                 {
373                     // Since ZERO is [0], so we cannot simply return 1 here, as
374                     // data[i] would be 0 for i == 0 in that case.
375                     return (data[i] > 0) ? 1 : 0;
376                 }
377                 else
378                     continue;
379             else
380                 if (i+1 > data.length)
381                     return -1;
382                 else if (tmp != data[i])
383                     return data[i] > tmp ? 1 : -1;
384         }
385         return 0;
386     }
387 
388     bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
389     {
390            return y.data[] == data[];
391     }
392 
393     bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
394     {
395         if (data.length > 2)
396             return false;
397         uint ylo = cast(uint)(y & 0xFFFF_FFFF);
398         uint yhi = cast(uint)(y >> 32);
399         if (data.length == 2 && data[1]!=yhi)
400             return false;
401         if (data.length == 1 && yhi != 0)
402             return false;
403         return (data[0] == ylo);
404     }
405 
406     bool isZero() pure const nothrow @safe @nogc scope
407     {
408         return data.length == 1 && data[0] == 0;
409     }
410 
411     size_t numBytes() pure nothrow const @safe @nogc scope
412     {
413         return data.length * BigDigit.sizeof;
414     }
415 
416     // the extra bytes are added to the start of the string
417     char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
418     {
419         immutable predictlength = 20+20*(data.length/2); // just over 19
420         char [] buff = new char[frontExtraBytes + predictlength];
421         ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
422         return buff[sofar-frontExtraBytes..$];
423     }
424 
425     /** Convert to a hex string, printing a minimum number of digits 'minPadding',
426      *  allocating an additional 'frontExtraBytes' at the start of the string.
427      *  Padding is done with padChar, which may be '0' or ' '.
428      *  'separator' is a digit separation character. If non-zero, it is inserted
429      *  between every 8 digits.
430      *  Separator characters do not contribute to the minPadding.
431      */
432     char [] toHexString(int frontExtraBytes, char separator = 0,
433             int minPadding=0, char padChar = '0',
434             LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
435     {
436         // Calculate number of extra padding bytes
437         size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
438             ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
439 
440         // Length not including separator bytes
441         size_t lenBytes = data.length * 2 * BigDigit.sizeof;
442 
443         // Calculate number of separator bytes
444         size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
445         immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
446 
447         char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
448         biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
449         if (extraPad > 0)
450         {
451             if (separator)
452             {
453                 size_t start = frontExtraBytes; // first index to pad
454                 if (extraPad &7)
455                 {
456                     // Do 1 to 7 extra zeros.
457                     buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
458                     buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
459                     start += (extraPad & 7) + 1;
460                 }
461                 for (int i=0; i< (extraPad >> 3); ++i)
462                 {
463                     buff[start .. start + 8] = padChar;
464                     buff[start + 8] = (padChar == ' ' ? ' ' : separator);
465                     start += 9;
466                 }
467             }
468             else
469             {
470                 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
471             }
472         }
473         int z = frontExtraBytes;
474         if (lenBytes > minPadding)
475         {
476             // Strip leading zeros.
477             ptrdiff_t maxStrip = lenBytes - minPadding;
478             while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
479             {
480                 ++z;
481                 --maxStrip;
482             }
483         }
484         if (padChar!='0')
485         {
486             // Convert leading zeros into padChars.
487             for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
488             {
489                 if (buff[k]=='0') buff[k]=padChar;
490             }
491         }
492         return buff[z-frontExtraBytes..$];
493     }
494 
495     /**
496      * Convert to an octal string.
497      */
498     char[] toOctalString() pure nothrow @safe const scope
499     {
500         auto predictLength = 1 + data.length*BigDigitBits / 3;
501         char[] buff = new char[predictLength];
502         size_t firstNonZero = biguintToOctal(buff, data);
503         return buff[firstNonZero .. $];
504     }
505 
506     // return false if invalid character found
507     bool fromHexString(Range)(Range s) scope
508     if (isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
509     {
510         import std.range : walkLength;
511 
512         //Strip leading zeros
513         while (!s.empty && s.front == '0')
514             s.popFront;
515 
516         if (s.empty)
517         {
518             data = ZERO;
519             return true;
520         }
521 
522         immutable len = (s.save.walkLength + 15) / 4;
523         auto tmp = new BigDigit[len + 1];
524         uint part, sofar, partcount;
525 
526         foreach_reverse (character; s)
527         {
528             if (character == '_')
529                 continue;
530 
531             uint x;
532             if (character >= '0' && character <= '9')
533             {
534                 x = character - '0';
535             }
536             else if (character >= 'A' && character <= 'F')
537             {
538                 x = character - 'A' + 10;
539             }
540             else if (character >= 'a' && character <= 'f')
541             {
542                 x = character - 'a' + 10;
543             }
544             else
545             {
546                 return false;
547             }
548 
549             part >>= 4;
550             part |= (x << (32 - 4));
551             ++partcount;
552 
553             if (partcount == 8)
554             {
555                 tmp[sofar] = part;
556                 ++sofar;
557                 partcount = 0;
558                 part = 0;
559             }
560         }
561         if (part)
562         {
563             for ( ; partcount != 8; ++partcount) part >>= 4;
564             tmp[sofar] = part;
565             ++sofar;
566         }
567         if (sofar == 0)
568             data = ZERO;
569         else
570             data = trustedAssumeUnique(tmp[0 .. sofar]);
571 
572         return true;
573     }
574 
575     // return true if OK; false if erroneous characters found
576     bool fromDecimalString(Range)(Range s) scope
577     if (isForwardRange!Range && isSomeChar!(ElementType!Range))
578     {
579         import std.range : walkLength;
580 
581         while (!s.empty && s.front == '0')
582         {
583             s.popFront;
584         }
585 
586         if (s.empty)
587         {
588             data = ZERO;
589             return true;
590         }
591 
592         auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
593         auto tmp = new BigDigit[predict_length];
594 
595         tmp.length = biguintFromDecimal(tmp, s);
596 
597         data = trustedAssumeUnique(tmp);
598         return true;
599     }
600 
601     void fromMagnitude(Range)(Range magnitude) scope
602     if (isInputRange!Range
603         && (isForwardRange!Range || hasLength!Range)
604         && isUnsigned!(ElementType!Range))
605     {
606         while (!magnitude.empty && magnitude.front == 0)
607             magnitude.popFront;
608         static if (hasLength!Range)
609             immutable inputLen = magnitude.length;
610         else
611             immutable inputLen = magnitude.save.walkLength;
612         if (!inputLen)
613         {
614             this.data = ZERO;
615             return;
616         }
617         // `magnitude` has its most significant element first but BigUint.data
618         // stores the most significant last.
619         BigDigit[] newDigits;
620         alias E = ElementType!Range;
621         static if (E.sizeof == BigDigit.sizeof)
622         {
623             newDigits = new BigDigit[inputLen];
624             foreach_reverse (ref digit; newDigits)
625             {
626                 digit = magnitude.front;
627                 magnitude.popFront();
628             }
629         }
630         else static if (E.sizeof < BigDigit.sizeof)
631         {
632             enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
633             newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
634             immutable remainder = inputLen % elementsPerDigit;
635             // If there is a remainder specially assemble the most significant digit.
636             if (remainder)
637             {
638                 BigDigit tmp = magnitude.front;
639                 magnitude.popFront();
640                 foreach (_; 1 .. remainder)
641                 {
642                     tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
643                     magnitude.popFront();
644                 }
645                 newDigits[$-1] = tmp;
646             }
647             // Assemble full digits from most to least significant.
648             foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
649             {
650                 BigDigit tmp;
651                 static foreach (n; 0 .. elementsPerDigit)
652                 {
653                     tmp |= cast(BigDigit) magnitude.front <<
654                         ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
655                     magnitude.popFront();
656                 }
657                 digit = tmp;
658             }
659         }
660         else static if (E.sizeof > BigDigit.sizeof)
661         {
662             enum digitsPerElement = E.sizeof / BigDigit.sizeof;
663             newDigits = new BigDigit[inputLen * digitsPerElement];
664             size_t i = newDigits.length - 1;
665             foreach (element; magnitude)
666             {
667                 static foreach (n; 0 .. digitsPerElement)
668                     newDigits[i - n] =
669                         cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
670                 i -= digitsPerElement;
671             }
672             while (newDigits[$-1] == 0)
673                 newDigits = newDigits[0 .. $-1];
674         }
675         else
676             static assert(0);
677         this.data = trustedAssumeUnique(newDigits);
678         return;
679     }
680 
681     nothrow pure @safe unittest
682     {
683         immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
684         // Internal representation is most-significant-last but `fromMagnitude`
685         // argument is most-significant-first.
686         immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
687         BigUint b;
688         // Test with reference magnitude.
689         b.fromMagnitude(referenceMagnitude);
690         assert(b.data == referenceData);
691         // Test ubyte array.
692         import std.bitmanip : nativeToBigEndian;
693         ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
694             nativeToBigEndian(referenceMagnitude[1]) ~
695             nativeToBigEndian(referenceMagnitude[2]);
696         b.data = ZERO;
697         b.fromMagnitude(ubyteMagnitude);
698         assert(b.data == referenceData);
699         // Test ulong array.
700         static if (BigDigit.sizeof == uint.sizeof)
701             immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
702                 ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
703             ];
704         else static if (BigDigit.sizeof == ulong.sizeof)
705             alias ulongMagnitude = referenceMagnitude;
706         b.data = ZERO;
707         b.fromMagnitude(ulongMagnitude);
708         assert(b.data == referenceData);
709     }
710 
711     ////////////////////////
712     //
713     // All of these member functions create a new BigUint.
714 
715     // return x >> y
716     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
717     if (op == ">>" && is (Tulong == ulong))
718     {
719         assert(y > 0, "Can not right shift BigUint by 0");
720         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
721         if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
722         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
723         if (bits == 0)
724         {
725             return BigUint(data[words..$]);
726         }
727         else
728         {
729             uint [] result = new BigDigit[data.length - words];
730             multibyteShr(result, data[words..$], bits);
731 
732             if (result.length > 1 && result[$-1] == 0)
733                 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
734             else
735                 return BigUint(trustedAssumeUnique(result));
736         }
737     }
738 
739     // return x << y
740     BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
741     if (op == "<<" && is (Tulong == ulong))
742     {
743         assert(y > 0, "Can not left shift BigUint by 0");
744         if (isZero()) return this;
745         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
746         assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
747                 "Shift result exceeds temporary store");
748         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
749         BigDigit [] result = new BigDigit[data.length + words+1];
750         result[0 .. words] = 0;
751         if (bits == 0)
752         {
753             result[words .. words+data.length] = data[];
754             return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
755         }
756         else
757         {
758             immutable c = multibyteShl(result[words .. words+data.length], data, bits);
759             if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
760             result[$-1] = c;
761             return BigUint(trustedAssumeUnique(result));
762         }
763     }
764 
765     // If wantSub is false, return x + y, leaving sign unchanged
766     // If wantSub is true, return abs(x - y), negating sign if x < y
767     static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y, bool wantSub, ref bool sign) pure nothrow @safe
768     if (is(Tulong == ulong))
769     {
770         BigUint r;
771         if (wantSub)
772         {   // perform a subtraction
773             if (x.data.length > 2)
774             {
775                 // subInt returns GC allocated array, can be safely cast to immutable
776                 r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
777             }
778             else
779             {   // could change sign!
780                 ulong xx = x.data[0];
781                 if (x.data.length > 1)
782                     xx += (cast(ulong) x.data[1]) << 32;
783                 ulong d;
784                 if (xx <= y)
785                 {
786                     d = y - xx;
787                     sign = !sign;
788                 }
789                 else
790                 {
791                     d = xx - y;
792                 }
793                 if (d == 0)
794                 {
795                     r = 0UL;
796                     sign = false;
797                     return r;
798                 }
799                 if (d > uint.max)
800                 {
801                     r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
802                 }
803                 else
804                 {
805                     r.data = [cast(uint)(d & 0xFFFF_FFFF)];
806                 }
807             }
808         }
809         else
810         {
811             // addInt returns GC allocated array, can be safely cast to immutable
812             r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
813         }
814         return r;
815     }
816 
817     // If wantSub is false, return x + y, leaving sign unchanged.
818     // If wantSub is true, return abs(x - y), negating sign if x<y
819     static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
820         pure nothrow @safe
821     {
822         BigUint r;
823         if (wantSub)
824         {   // perform a subtraction
825             bool negative;
826             // sub returns GC allocated array, can be safely cast to immutable
827             r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
828             sign ^= negative;
829             if (r.isZero())
830             {
831                 sign = false;
832             }
833         }
834         else
835         {
836             // add returns GC allocated array, can be safely cast to immutable
837             r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
838         }
839         return r;
840     }
841 
842 
843     //  return x*y.
844     static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
845     {
846         if (y == 0 || x == 0) return BigUint(ZERO);
847         static if (T.sizeof * 8 <= 32)
848             uint hi = 0;
849         else
850             uint hi = cast(uint) (y >>> 32);
851         uint lo = cast(uint) (y & 0xFFFF_FFFF);
852         uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
853         result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
854         if (hi != 0)
855         {
856             result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
857                 x.data, hi, 0);
858         }
859         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
860     }
861 
862     /*  return x * y.
863      */
864     static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
865     {
866         if (y == 0 || x == 0)
867             return BigUint(ZERO);
868         auto len = x.data.length + y.data.length;
869         BigDigit [] result = new BigDigit[len];
870         if (y.data.length > x.data.length)
871         {
872             mulInternal(result, y.data, x.data);
873         }
874         else
875         {
876             if (x.data[]==y.data[]) squareInternal(result, x.data);
877             else mulInternal(result, x.data, y.data);
878         }
879         // the highest element could be zero,
880         // in which case we need to reduce the length
881         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
882     }
883 
884     // return x / y
885     static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
886     if ( is(immutable T == immutable uint) )
887     {
888         uint y = y_;
889         if (y == 1)
890             return x;
891         uint [] result = new BigDigit[x.data.length];
892         if ((y&(-y))==y)
893         {
894             assert(y != 0, "BigUint division by zero");
895             // perfect power of 2
896             uint b = 0;
897             for (;y != 1; y>>=1)
898             {
899                 ++b;
900             }
901             multibyteShr(result, x.data, b);
902         }
903         else
904         {
905             result[] = x.data[];
906             cast(void) multibyteDivAssign(result, y, 0);
907         }
908         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
909     }
910 
911     static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
912     if ( is(immutable T == immutable ulong) )
913     {
914         if (y <= uint.max)
915             return divInt!uint(x, cast(uint) y);
916         if (x.data.length < 2)
917             return BigUint(ZERO);
918         uint hi = cast(uint)(y >>> 32);
919         uint lo = cast(uint)(y & 0xFFFF_FFFF);
920         immutable uint[2] z = [lo, hi];
921         BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
922         divModInternal(result, null, x.data, z[]);
923         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
924     }
925 
926     // return x % y
927     static uint modInt(T)(scope BigUint x, T y_) pure
928     if ( is(immutable T == immutable uint) )
929     {
930         import core.memory : GC;
931         uint y = y_;
932         assert(y != 0, "% 0 not allowed");
933         if ((y&(-y)) == y)
934         {   // perfect power of 2
935             return x.data[0] & (y-1);
936         }
937         else
938         {
939             // horribly inefficient - malloc, copy, & store are unnecessary.
940             uint [] wasteful = new BigDigit[x.data.length];
941             wasteful[] = x.data[];
942             immutable rem = multibyteDivAssign(wasteful, y, 0);
943             () @trusted { GC.free(wasteful.ptr); } ();
944             return rem;
945         }
946     }
947 
948     // return x / y
949     static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
950     {
951         if (y.data.length > x.data.length)
952             return BigUint(ZERO);
953         if (y.data.length == 1)
954             return divInt(x, y.data[0]);
955         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
956            divModInternal(result, null, x.data, y.data);
957         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
958     }
959 
960     // return x % y
961     static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
962     {
963         if (y.data.length > x.data.length) return x;
964         if (y.data.length == 1)
965         {
966             return BigUint([modInt(x, y.data[0])]);
967         }
968         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
969         BigDigit [] rem = new BigDigit[y.data.length];
970         divModInternal(result, rem, x.data, y.data);
971         return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
972     }
973 
974     // Return x / y in quotient, x % y in remainder
975     static void divMod(BigUint x, scope BigUint y,
976                        out BigUint quotient, out BigUint remainder) pure nothrow @safe
977     {
978         /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
979         if (y.data.length > x.data.length)
980         {
981             quotient = 0uL;
982             remainder = x;
983         }
984         else if (y.data.length == 1)
985         {
986             quotient = divInt(x, y.data[0]);
987             remainder = BigUint([modInt(x, y.data[0])]);
988         }
989         else
990         {
991             BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
992             BigDigit[] rem = new BigDigit[y.data.length];
993             divModInternal(quot, rem, x.data, y.data);
994             quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
995             remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
996         }
997     }
998 
999     // return x op y
1000     static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
1001     pure nothrow @safe
1002     if (op == "|" || op == "^" || op == "&")
1003     {
1004         auto d1 = includeSign(x.data, y.uintLength, xSign);
1005         auto d2 = includeSign(y.data, x.uintLength, ySign);
1006 
1007         foreach (i; 0 .. d1.length)
1008         {
1009             mixin("d1[i] " ~ op ~ "= d2[i];");
1010         }
1011 
1012         mixin("resultSign = xSign " ~ op ~ " ySign;");
1013 
1014         if (resultSign)
1015         {
1016             twosComplement(d1, d1);
1017         }
1018 
1019         return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1020     }
1021 
1022     /**
1023      * Return a BigUint which is x raised to the power of y.
1024      * Method: Powers of 2 are removed from x, then left-to-right binary
1025      * exponentiation is used.
1026      * Memory allocation is minimized: at most one temporary BigUint is used.
1027      */
1028     static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1029     {
1030         // Deal with the degenerate cases first.
1031         if (y == 0) return BigUint(ONE);
1032         if (y == 1) return x;
1033         if (x == 0 || x == 1) return x;
1034 
1035         BigUint result;
1036 
1037         // Simplify, step 1: Remove all powers of 2.
1038         uint firstnonzero = firstNonZeroDigit(x.data);
1039         // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1040         // where BigDigitBits = BigDigit.sizeof * 8
1041 
1042         // See if x[firstnonzero..$] can now fit into a single digit.
1043         bool singledigit = ((x.data.length - firstnonzero) == 1);
1044         // If true, then x0 is that digit
1045         // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1046         BigDigit x0 = x.data[firstnonzero];
1047         assert(x0 != 0, "pow(0, y) not allowed");
1048         // Length of the non-zero portion
1049         size_t nonzerolength = x.data.length - firstnonzero;
1050         ulong y0;
1051         uint evenbits = 0; // number of even bits in the bottom of x
1052         while (!(x0 & 1))
1053         {
1054             x0 >>= 1;
1055             ++evenbits;
1056         }
1057 
1058         if (x.data.length- firstnonzero == 2)
1059         {
1060             // Check for a single digit straddling a digit boundary
1061             const BigDigit x1 = x.data[firstnonzero+1];
1062             if ((x1 >> evenbits) == 0)
1063             {
1064                 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1065                 singledigit = true;
1066             }
1067         }
1068         // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1069 
1070         uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1071 
1072         // Simplify, step 2: For singledigits, see if we can trivially reduce y
1073 
1074         BigDigit finalMultiplier = 1UL;
1075 
1076         if (singledigit)
1077         {
1078             // x fits into a single digit. Raise it to the highest power we can
1079             // that still fits into a single digit, then reduce the exponent accordingly.
1080             // We're quite likely to have a residual multiply at the end.
1081             // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1082             // and 5^^13 still fits into a uint.
1083             evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1084             if (x0 == 1)
1085             {   // Perfect power of 2
1086                 result = 1UL;
1087                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1088             }
1089             immutable p = highestPowerBelowUintMax(x0);
1090             if (y <= p)
1091             {   // Just do it with pow
1092                 result = cast(ulong) intpow(x0, y);
1093                 if (evenbits + firstnonzero == 0)
1094                     return result;
1095                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1096             }
1097             y0 = y / p;
1098             finalMultiplier = intpow(x0, y - y0*p);
1099             x0 = intpow(x0, p);
1100             // Result is x0
1101             nonzerolength = 1;
1102         }
1103         // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1104 
1105         // Perform a crude check for overflow and allocate result buffer.
1106         // The length required is y * lg2(x) bits.
1107         // which will always fit into y*x.length digits. But this is
1108         // a gross overestimate if x is small (length 1 or 2) and the highest
1109         // digit is nearly empty.
1110         // A better estimate is:
1111         //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1112         //  and the first term is always between
1113         //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1114         //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1115         // For single digit payloads, we already have
1116         //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1117         // and x0 is almost a full digit, so it's a tight estimate.
1118         // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1119         // Note that the divisions must be rounded up.
1120 
1121         // Estimated length in BigDigits
1122         immutable estimatelength = singledigit
1123             ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1124             :  x.data.length * y;
1125         // Imprecise check for overflow. Makes the extreme cases easier to debug
1126         // (less extreme overflow will result in an out of memory error).
1127         if (estimatelength > uint.max/(4*BigDigit.sizeof))
1128             assert(0, "Overflow in BigInt.pow");
1129 
1130         // The result buffer includes space for all the trailing zeros
1131         BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1132 
1133         // Do all the powers of 2!
1134         size_t result_start = cast(size_t)( firstnonzero * y
1135             + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1136 
1137         resultBuffer[0 .. result_start] = 0;
1138         BigDigit [] t1 = resultBuffer[result_start..$];
1139         BigDigit [] r1;
1140 
1141         if (singledigit)
1142         {
1143             r1 = t1[0 .. 1];
1144             r1[0] = x0;
1145             y = y0;
1146         }
1147         else
1148         {
1149             // It's not worth right shifting by evenbits unless we also shrink the length after each
1150             // multiply or squaring operation. That might still be worthwhile for large y.
1151             r1 = t1[0 .. x.data.length - firstnonzero];
1152             r1[0..$] = x.data[firstnonzero..$];
1153         }
1154 
1155         if (y>1)
1156         {   // Set r1 = r1 ^^ y.
1157             // The secondary buffer only needs space for the multiplication results
1158             BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1159             BigDigit [] r2;
1160 
1161             int shifts = 63; // num bits in a long
1162             while (!(y & 0x8000_0000_0000_0000L))
1163             {
1164                 y <<= 1;
1165                 --shifts;
1166             }
1167             y <<=1;
1168 
1169             while (y != 0)
1170             {
1171                 // For each bit of y: Set r1 =  r1 * r1
1172                 // If the bit is 1, set r1 = r1 * x
1173                 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1174                 // Optimization opportunity: if more than 2 bits in y are set,
1175                 // it's usually possible to reduce the number of multiplies
1176                 // by caching odd powers of x. eg for y = 54,
1177                 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1178                 r2 = t2[0 .. r1.length*2];
1179                 squareInternal(r2, r1);
1180                 if (y & 0x8000_0000_0000_0000L)
1181                 {
1182                     r1 = t1[0 .. r2.length + nonzerolength];
1183                     if (singledigit)
1184                     {
1185                         r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1186                     }
1187                     else
1188                     {
1189                         mulInternal(r1, r2, x.data[firstnonzero..$]);
1190                     }
1191                 }
1192                 else
1193                 {
1194                     r1 = t1[0 .. r2.length];
1195                     r1[] = r2[];
1196                 }
1197                 y <<=1;
1198                 shifts--;
1199             }
1200             while (shifts>0)
1201             {
1202                 r2 = t2[0 .. r1.length * 2];
1203                 squareInternal(r2, r1);
1204                 r1 = t1[0 .. r2.length];
1205                 r1[] = r2[];
1206                 --shifts;
1207             }
1208         }
1209 
1210         if (finalMultiplier != 1)
1211         {
1212             const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1213             if (carry)
1214             {
1215                 r1 = t1[0 .. r1.length + 1];
1216                 r1[$-1] = carry;
1217             }
1218         }
1219         if (evenshiftbits)
1220         {
1221             const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1222             if (carry != 0)
1223             {
1224                 r1 = t1[0 .. r1.length + 1];
1225                 r1[$ - 1] = carry;
1226             }
1227         }
1228         while (r1[$ - 1]==0)
1229         {
1230             r1=r1[0 .. $ - 1];
1231         }
1232         return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1233     }
1234 
1235     // Implement toHash so that BigUint works properly as an AA key.
1236     size_t toHash() const @nogc nothrow pure @safe scope
1237     {
1238         return .hashOf(data);
1239     }
1240 
1241 } // end BigUint
1242 
1243 @safe pure nothrow unittest
1244 {
1245     // ulong comparison test
1246     BigUint a = [1];
1247     assert(a == 1);
1248     // https://issues.dlang.org/show_bug.cgi?id=9548
1249     assert(a < 0x8000_0000_0000_0000UL);
1250 
1251     // https://issues.dlang.org/show_bug.cgi?id=12234
1252     BigUint z = [0];
1253     assert(z == 0UL);
1254     assert(!(z > 0UL));
1255     assert(!(z < 0UL));
1256 }
1257 
1258 // https://issues.dlang.org/show_bug.cgi?id=16223
1259 @system pure nothrow unittest
1260 {
1261     BigUint a = [3];
1262     int b = 5;
1263     assert(BigUint.mulInt(a,b) == 15);
1264 }
1265 
1266 // Remove leading zeros from x, to restore the BigUint invariant
1267 inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1268 {
1269     size_t k = x.length;
1270     while (k>1 && x[k - 1]==0) --k;
1271     return x[0 .. k];
1272 }
1273 
1274 pure @safe unittest
1275 {
1276    BigUint r = BigUint([5]);
1277    BigUint t = BigUint([7]);
1278    BigUint s = BigUint.mod(r, t);
1279    assert(s == 5);
1280 }
1281 
1282 
1283 @safe pure unittest
1284 {
1285     BigUint r;
1286     r = 5UL;
1287     assert(r.peekUlong(0) == 5UL);
1288     assert(r.peekUint(0) == 5U);
1289     r = 0x1234_5678_9ABC_DEF0UL;
1290     assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1291     assert(r.peekUint(0) == 0x9ABC_DEF0U);
1292 }
1293 
1294 
1295 // Pow tests
1296 pure @safe unittest
1297 {
1298     BigUint r, s;
1299     r.fromHexString("80000000_00000001");
1300     s = BigUint.pow(r, 5);
1301     r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1302       ~ "_00000002_80000000_00000001");
1303     assert(s == r);
1304     s = 10UL;
1305     s = BigUint.pow(s, 39);
1306     r.fromDecimalString("1000000000000000000000000000000000000000");
1307     assert(s == r);
1308     r.fromHexString("1_E1178E81_00000000");
1309     s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1310 
1311     r.fromDecimalString("000_000_00");
1312     assert(r == 0);
1313     r.fromDecimalString("0007");
1314     assert(r == 7);
1315     r.fromDecimalString("0");
1316     assert(r == 0);
1317 }
1318 
1319 // Radix conversion tests
1320 @safe pure unittest
1321 {
1322     BigUint r;
1323     r.fromHexString("1_E1178E81_00000000");
1324     assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1325     assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1326     assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1327     assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1328     assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1329     assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1330     assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1331     assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1332     r = 0UL;
1333     assert(r.toHexString(0, '_', 0) == "0");
1334     assert(r.toHexString(0, '_', 7) == "0000000");
1335     assert(r.toHexString(0, '_', 7, ' ') == "      0");
1336     assert(r.toHexString(0, '#', 9) == "0#00000000");
1337     assert(r.toHexString(0, 0, 9) == "000000000");
1338 }
1339 
1340 //
1341 @safe pure unittest
1342 {
1343     BigUint r;
1344     r.fromHexString("1_E1178E81_00000000");
1345     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1346     assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1347     assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1348     assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1349     assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1350     assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1351     assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1352     assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1353     r = 0UL;
1354     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1355     assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1356     assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1357     assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1358     assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1359     assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1360 }
1361 
1362 
1363 private:
1364 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1365 pure nothrow @safe
1366 {
1367     foreach (i; 0 .. x.length)
1368     {
1369         result[i] = ~x[i];
1370     }
1371     result[x.length..$] = BigDigit.max;
1372 
1373     foreach (i; 0 .. result.length)
1374     {
1375         if (result[i] == BigDigit.max)
1376         {
1377             result[i] = 0;
1378         }
1379         else
1380         {
1381             result[i] += 1;
1382             break;
1383         }
1384     }
1385 }
1386 
1387 // Encode BigInt as BigDigit array (sign and 2's complement)
1388 BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1389 pure nothrow @safe
1390 {
1391     size_t length = (x.length > minSize) ? x.length : minSize;
1392     BigDigit [] result = new BigDigit[length];
1393     if (sign)
1394     {
1395         twosComplement(x, result);
1396     }
1397     else
1398     {
1399         result[0 .. x.length] = x;
1400     }
1401     return result;
1402 }
1403 
1404 // works for any type
1405 T intpow(T)(T x, ulong n) pure nothrow @safe
1406 {
1407     T p;
1408 
1409     switch (n)
1410     {
1411     case 0:
1412         p = 1;
1413         break;
1414 
1415     case 1:
1416         p = x;
1417         break;
1418 
1419     case 2:
1420         p = x * x;
1421         break;
1422 
1423     default:
1424         p = 1;
1425         while (1)
1426         {
1427             if (n & 1)
1428                 p *= x;
1429             n >>= 1;
1430             if (!n)
1431                 break;
1432             x *= x;
1433         }
1434         break;
1435     }
1436     return p;
1437 }
1438 
1439 
1440 //  returns the maximum power of x that will fit in a uint.
1441 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1442 {
1443      assert(x > 1, "x must be greater than 1");
1444      static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1445                                           8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1446      if (x<24) return maxpwr[x-2];
1447      if (x<41) return 6;
1448      if (x<85) return 5;
1449      if (x<256) return 4;
1450      if (x<1626) return 3;
1451      if (x<65_536) return 2;
1452      return 1;
1453 }
1454 
1455 //  returns the maximum power of x that will fit in a ulong.
1456 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1457 {
1458      assert(x > 1, "x must be greater than 1");
1459      static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1460                                          17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1461                                          14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1462                                          12, 12, 12, 12, 12, 12, 12, 12, 12];
1463      if (x<41) return maxpwr[x-2];
1464      if (x<57) return 11;
1465      if (x<85) return 10;
1466      if (x<139) return 9;
1467      if (x<256) return 8;
1468      if (x<566) return 7;
1469      if (x<1626) return 6;
1470      if (x<7132) return 5;
1471      if (x<65_536) return 4;
1472      if (x<2_642_246) return 3;
1473      return 2;
1474 }
1475 
1476 version (StdUnittest)
1477 {
1478 
1479 private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1480 {
1481      int pwr = 1;
1482      for (ulong q = x;x*q < cast(ulong) uint.max; )
1483      {
1484          q*=x; ++pwr;
1485      }
1486      return pwr;
1487 }
1488 
1489 @safe pure unittest
1490 {
1491     assert(highestPowerBelowUintMax(10)==9);
1492     for (int k=82; k<88; ++k)
1493     {
1494         assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1495     }
1496 }
1497 
1498 }
1499 
1500 
1501 /*  General unsigned subtraction routine for bigints.
1502  *  Sets result = x - y. If the result is negative, negative will be true.
1503  * Returns:
1504  *    unique memory
1505  */
1506 BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1507 pure nothrow @safe
1508 {
1509     if (x.length == y.length)
1510     {
1511         // There's a possibility of cancellation, if x and y are almost equal.
1512         ptrdiff_t last = highestDifferentDigit(x, y);
1513         BigDigit [] result = new BigDigit[last+1];
1514         if (x[last] < y[last])
1515         {   // we know result is negative
1516             multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1517             *negative = true;
1518         }
1519         else
1520         {   // positive or zero result
1521             multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1522             *negative = false;
1523         }
1524         while (result.length > 1 && result[$-1] == 0)
1525         {
1526             result = result[0..$-1];
1527         }
1528 //        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1529         return result;
1530     }
1531     // Lengths are different
1532     const(BigDigit) [] large, small;
1533     if (x.length < y.length)
1534     {
1535         *negative = true;
1536         large = y; small = x;
1537     }
1538     else
1539     {
1540         *negative = false;
1541         large = x; small = y;
1542     }
1543     // result.length will be equal to larger length, or could decrease by 1.
1544 
1545 
1546     BigDigit [] result = new BigDigit[large.length];
1547     BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1548     result[small.length..$] = large[small.length..$];
1549     if (carry)
1550     {
1551         multibyteIncrementAssign!('-')(result[small.length..$], carry);
1552     }
1553     while (result.length > 1 && result[$-1] == 0)
1554     {
1555         result = result[0..$-1];
1556     }
1557     return result;
1558 }
1559 
1560 
1561 /*
1562  * return a + b
1563  * Returns:
1564  *    unique memory
1565  */
1566 BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1567 {
1568     const(BigDigit) [] x, y;
1569     if (a.length < b.length)
1570     {
1571         x = b; y = a;
1572     }
1573     else
1574     {
1575         x = a; y = b;
1576     }
1577     // now we know x.length > y.length
1578     // create result. add 1 in case it overflows
1579     BigDigit [] result = new BigDigit[x.length + 1];
1580 
1581     BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1582     if (x.length != y.length)
1583     {
1584         result[y.length..$-1]= x[y.length..$];
1585         carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1586     }
1587     if (carry)
1588     {
1589         result[$-1] = carry;
1590         return result;
1591     }
1592     else
1593         return result[0..$-1];
1594 }
1595 
1596 /**  return x + y
1597  */
1598 BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1599 {
1600     uint hi = cast(uint)(y >>> 32);
1601     uint lo = cast(uint)(y& 0xFFFF_FFFF);
1602     auto len = x.length;
1603     if (x.length < 2 && hi != 0) ++len;
1604     BigDigit [] result = new BigDigit[len+1];
1605     result[0 .. x.length] = x[];
1606     if (x.length < 2 && hi != 0)
1607     {
1608         result[1]=hi;
1609         hi=0;
1610     }
1611     uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1612     if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1613     if (carry)
1614     {
1615         result[$-1] = carry;
1616         return result;
1617     }
1618     else
1619         return result[0..$-1];
1620 }
1621 
1622 /** Return x - y.
1623  *  x must be greater than y.
1624  */
1625 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1626 {
1627     uint hi = cast(uint)(y >>> 32);
1628     uint lo = cast(uint)(y & 0xFFFF_FFFF);
1629     BigDigit [] result = new BigDigit[x.length];
1630     result[] = x[];
1631     multibyteIncrementAssign!('-')(result[], lo);
1632     if (hi)
1633         multibyteIncrementAssign!('-')(result[1..$], hi);
1634     if (result[$-1] == 0)
1635         return result[0..$-1];
1636     else
1637         return result;
1638 }
1639 
1640 /**  General unsigned multiply routine for bigints.
1641  *  Sets result = x * y.
1642  *
1643  *  The length of y must not be larger than the length of x.
1644  *  Different algorithms are used, depending on the lengths of x and y.
1645  *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1646  *  unbalanced case. (But I doubt it would be faster in practice).
1647  *
1648  */
1649 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1650     pure nothrow @safe
1651 {
1652     import core.memory : GC;
1653     assert( result.length == x.length + y.length,
1654             "result array must have enough space to store computed result");
1655     assert( y.length > 0, "y must not be empty");
1656     assert( x.length >= y.length, "x must be greater or equal than y");
1657     if (y.length <= KARATSUBALIMIT)
1658     {
1659         // Small multiplier, we'll just use the asm classic multiply.
1660         if (y.length == 1)
1661         {   // Trivial case, no cache effects to worry about
1662             result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1663             return;
1664         }
1665 
1666         immutable CACHELIMIT = getCacheLimit;
1667         if (x.length + y.length < CACHELIMIT)
1668             return mulSimple(result, x, y);
1669 
1670         // If x is so big that it won't fit into the cache, we divide it into chunks
1671         // Every chunk must be greater than y.length.
1672         // We make the first chunk shorter, if necessary, to ensure this.
1673 
1674         auto chunksize = CACHELIMIT / y.length;
1675         immutable residual  =  x.length % chunksize;
1676         if (residual < y.length)
1677         {
1678             chunksize -= y.length;
1679         }
1680 
1681         // Use schoolbook multiply.
1682         mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1683         auto done = chunksize;
1684 
1685         while (done < x.length)
1686         {
1687             // result[done .. done+ylength] already has a value.
1688             chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1689             BigDigit [KARATSUBALIMIT] partial;
1690             partial[0 .. y.length] = result[done .. done+y.length];
1691             mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1692             addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1693             done += chunksize;
1694         }
1695         return;
1696     }
1697 
1698     immutable half = (x.length >> 1) + (x.length & 1);
1699     if (2*y.length*y.length <= x.length*x.length)
1700     {
1701         // UNBALANCED MULTIPLY
1702         // Use school multiply to cut into quasi-squares of Karatsuba-size
1703         // or larger. The ratio of the two sides of the 'square' must be
1704         // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1705         //
1706         // For maximum performance, we want the ratio to be as close to
1707         // 1:1 as possible. To achieve this, we can either pad x or y.
1708         // The best choice depends on the modulus x%y.
1709         auto numchunks = x.length / y.length;
1710         auto chunksize = y.length;
1711         auto extra =  x.length % y.length;
1712         auto maxchunk = chunksize + extra;
1713         bool paddingY; // true = we're padding Y, false = we're padding X.
1714         bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1715         if (numchunks == 1 && isExtraSmall)
1716         {
1717             // We divide (x_first_half * y) and (x_last_half * y)
1718             // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1719             // (1.414 ~ 1.707)/2:1 is balanced.
1720             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1721             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1722             scratchbuff = scratchbuff[0 .. $ - y.length];
1723             mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1724             partial[] = result[half .. half + y.length];
1725             mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1726             BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1727             if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1728             () @trusted { GC.free(scratchbuff.ptr); } ();
1729         }
1730         else
1731         {
1732             if (isExtraSmall)
1733             {
1734                 // The leftover bit is small enough that it should be incorporated
1735                 // in the existing chunks.
1736                 // Make all the chunks a tiny bit bigger
1737                 // (We're padding y with zeros)
1738                 chunksize += extra / numchunks;
1739                 extra = x.length - chunksize*numchunks;
1740                 // there will probably be a few left over.
1741                 // Every chunk will either have size chunksize, or chunksize+1.
1742                 maxchunk = chunksize + 1;
1743                 paddingY = true;
1744                 assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1745                     "Unexpected size");
1746             }
1747             else
1748             {
1749                 // the extra bit is large enough that it's worth making a new chunk.
1750                 // (This means we're padding x with zeros, when doing the first one).
1751                 maxchunk = chunksize;
1752                 ++numchunks;
1753                 paddingY = false;
1754                 assert(extra + chunksize *(numchunks-1) == x.length,
1755                     "Unexpected size");
1756             }
1757             // We make the buffer a bit bigger so we have space for the partial sums.
1758             BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1759             BigDigit [] partial = scratchbuff[$ - y.length .. $];
1760             scratchbuff = scratchbuff[0 .. $ - y.length];
1761             size_t done; // how much of X have we done so far?
1762             if (paddingY)
1763             {
1764                 // If the first chunk is bigger, do it first. We're padding y.
1765                 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1766                     x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1767                 done = chunksize + (extra > 0 ? 1 : 0);
1768                 if (extra) --extra;
1769             }
1770             else
1771             {   // We're padding X. Begin with the extra bit.
1772                 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1773                 done = extra;
1774                 extra = 0;
1775             }
1776             immutable basechunksize = chunksize;
1777             while (done < x.length)
1778             {
1779                 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1780                 if (extra) --extra;
1781                 partial[] = result[done .. done+y.length];
1782                 mulKaratsuba(result[done .. done + y.length + chunksize],
1783                         x[done .. done+chunksize], y, scratchbuff);
1784                 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1785                 done += chunksize;
1786             }
1787             () @trusted { GC.free(scratchbuff.ptr); } ();
1788         }
1789     }
1790     else
1791     {
1792         // Balanced. Use Karatsuba directly.
1793         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1794         mulKaratsuba(result, x, y, scratchbuff);
1795         () @trusted { GC.free(scratchbuff.ptr); } ();
1796     }
1797 }
1798 
1799 // https://issues.dlang.org/show_bug.cgi?id=20493
1800 @safe unittest
1801 {
1802     // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1803     // the number itself isn't important, only the amount of digits, so we do a simpler
1804     // multiplication of the same size, analogous to:
1805     // 11111111 * 11111111 = 0123456787654321
1806     // but instead of base 10, it's in base `BigDigit`
1807 
1808     BigDigit[398] x = 1;
1809     BigDigit[236] y = 1;
1810     BigDigit[x.length + y.length] result;
1811     mulInternal(result[], x[], y[]);
1812 
1813     // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1814     BigDigit[x.length + y.length] expected = y.length;
1815     foreach (BigDigit i; 0 .. y.length)
1816     {
1817         expected[i] = i+1;
1818         expected[$-1-i] = i;
1819     }
1820 
1821     assert(result == expected);
1822 }
1823 
1824 /**  General unsigned squaring routine for BigInts.
1825  *   Sets result = x*x.
1826  *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1827  *   also be zero.
1828  */
1829 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1830 {
1831   import core.memory : GC;
1832   // Squaring is potentially half a multiply, plus add the squares of
1833   // the diagonal elements.
1834   assert(result.length == 2*x.length,
1835      "result needs to have twice the capacity of x");
1836   if (x.length <= KARATSUBASQUARELIMIT)
1837   {
1838       if (x.length == 1)
1839       {
1840          result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1841          return;
1842       }
1843       return squareSimple(result, x);
1844   }
1845   // The nice thing about squaring is that it always stays balanced
1846   BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1847   squareKaratsuba(result, x, scratchbuff);
1848   () @trusted { GC.free(scratchbuff.ptr); } ();
1849 }
1850 
1851 
1852 import core.bitop : bsr;
1853 
1854 /// if remainder is null, only calculate quotient.
1855 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1856         const BigDigit [] v) pure nothrow @safe
1857 {
1858     import core.memory : GC;
1859     assert(quotient.length == u.length - v.length + 1,
1860         "Invalid quotient length");
1861     assert(remainder == null || remainder.length == v.length,
1862         "Invalid remainder");
1863     assert(v.length > 1, "v must have more than 1 element");
1864     assert(u.length >= v.length, "u must be as longer or longer than v");
1865 
1866     // Normalize by shifting v left just enough so that
1867     // its high-order bit is on, and shift u left the
1868     // same amount. The highest bit of u will never be set.
1869 
1870     BigDigit [] vn = new BigDigit[v.length];
1871     BigDigit [] un = new BigDigit[u.length + 1];
1872     // How much to left shift v, so that its MSB is set.
1873     uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1874     if (s != 0)
1875     {
1876         multibyteShl(vn, v, s);
1877         un[$-1] = multibyteShl(un[0..$-1], u, s);
1878     }
1879     else
1880     {
1881         vn[] = v[];
1882         un[0..$-1] = u[];
1883         un[$-1] = 0;
1884     }
1885     if (quotient.length<FASTDIVLIMIT)
1886     {
1887         schoolbookDivMod(quotient, un, vn);
1888     }
1889     else
1890     {
1891         blockDivMod(quotient, un, vn);
1892     }
1893 
1894     // Unnormalize remainder, if required.
1895     if (remainder != null)
1896     {
1897         if (s == 0) remainder[] = un[0 .. vn.length];
1898         else multibyteShr(remainder, un[0 .. vn.length+1], s);
1899     }
1900     () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1901 }
1902 
1903 pure @safe unittest
1904 {
1905     immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1906     immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1907     uint [] q = new uint[u.length - v.length + 1];
1908     uint [] r = new uint[2];
1909     divModInternal(q, r, u, v);
1910     assert(q[]==[0xFFFF_FFFFu, 0]);
1911     assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1912     u = [0, 0xFFFF_FFFE, 0x8000_0001];
1913     v = [0xFFFF_FFFF, 0x8000_0000];
1914     divModInternal(q, r, u, v);
1915 }
1916 
1917 
1918 // Converts a big uint to a hexadecimal string.
1919 //
1920 // Optionally, a separator character (eg, an underscore) may be added between
1921 // every 8 digits.
1922 // buff.length must be data.length*8 if separator is zero,
1923 // or data.length*9 if separator is non-zero. It will be completely filled.
1924 char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1925         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1926 {
1927     int x=0;
1928     for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1929     {
1930         toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1931         x+=8;
1932         if (separator)
1933         {
1934             if (i>0) buff[x] = separator;
1935             ++x;
1936         }
1937     }
1938     return buff;
1939 }
1940 
1941 /**
1942  * Convert a big uint into an octal string.
1943  *
1944  * Params:
1945  *  buff = The destination buffer for the octal string. Must be large enough to
1946  *      store the result, including leading zeroes, which is
1947  *      ceil(data.length * BigDigitBits / 3) characters.
1948  *      The buffer is filled from back to front, starting from `buff[$-1]`.
1949  *  data = The biguint to be converted.
1950  *
1951  * Returns: The index of the leading non-zero digit in `buff`. Will be
1952  * `buff.length - 1` if the entire big uint is zero.
1953  */
1954 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1955     pure nothrow @safe @nogc
1956 {
1957     ubyte carry = 0;
1958     int shift = 0;
1959     size_t penPos = buff.length - 1;
1960     size_t lastNonZero = buff.length - 1;
1961 
1962     pragma(inline) void output(uint digit) @nogc nothrow
1963     {
1964         if (digit != 0)
1965             lastNonZero = penPos;
1966         buff[penPos--] = cast(char)('0' + digit);
1967     }
1968 
1969     foreach (bigdigit; data)
1970     {
1971         if (shift < 0)
1972         {
1973             // Some bits were carried over from previous word.
1974             assert(shift > -3, "shift must be greater than -3");
1975             output(((bigdigit << -shift) | carry) & 0b111);
1976             shift += 3;
1977             assert(shift > 0, "shift must be 1 or greater");
1978         }
1979 
1980         while (shift <= BigDigitBits - 3)
1981         {
1982             output((bigdigit >>> shift) & 0b111);
1983             shift += 3;
1984         }
1985 
1986         if (shift < BigDigitBits)
1987         {
1988             // Some bits need to be carried forward.
1989             carry = (bigdigit >>> shift) & 0b11;
1990         }
1991         shift -= BigDigitBits;
1992         assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
1993     }
1994 
1995     if (shift < 0)
1996     {
1997         // Last word had bits that haven't been output yet.
1998         assert(shift > -3, "Shift must be greater than -3");
1999         output(carry);
2000     }
2001 
2002     return lastNonZero;
2003 }
2004 
2005 /** Convert a big uint into a decimal string.
2006  *
2007  * Params:
2008  *  data    The biguint to be converted. Will be destroyed.
2009  *  buff    The destination buffer for the decimal string. Must be
2010  *          large enough to store the result, including leading zeros.
2011  *          Will be filled backwards, starting from buff[$-1].
2012  *
2013  * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2014  * Returns:
2015  *    the lowest index of buff which was used.
2016  */
2017 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2018 {
2019     ptrdiff_t sofar = buff.length;
2020     // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2021     // the price of 3 divisions and a shr; this version only gives 27 digits
2022     // for 3 divisions.
2023     while (data.length>1)
2024     {
2025         uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2026         itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2027         sofar -= 9;
2028         if (data[$-1] == 0 && data.length > 1)
2029         {
2030             data.length = data.length - 1;
2031         }
2032     }
2033     itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2034     sofar -= 10;
2035     // and strip off the leading zeros
2036     while (sofar != buff.length-1 && buff[sofar] == '0')
2037         sofar++;
2038     return sofar;
2039 }
2040 
2041 /** Convert a decimal string into a big uint.
2042  *
2043  * Params:
2044  *  data    The biguint to be receive the result. Must be large enough to
2045  *          store the result.
2046  *  s       The decimal string. May contain _ or 0 .. 9
2047  *
2048  * The required length for the destination buffer is slightly less than
2049  *  1 + s.length/log2(10) = 1 + s.length/3.3219.
2050  *
2051  * Returns:
2052  *    the highest index of data which was used.
2053  */
2054 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2055 if (
2056     isInputRange!Range &&
2057     isSomeChar!(ElementType!Range) &&
2058     !isInfinite!Range)
2059 in
2060 {
2061     static if (hasLength!Range)
2062         assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2063             "data has a invalid length");
2064 }
2065 do
2066 {
2067     import std.conv : ConvException;
2068 
2069     // Convert to base 1e19 = 10_000_000_000_000_000_000.
2070     // (this is the largest power of 10 that will fit into a long).
2071     // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2072     // 485 bits will only just fit into 146 decimal digits.
2073     // As we convert the string, we record the number of digits we've seen in base 19:
2074     // hi is the number of digits/19, lo is the extra digits (0 to 18).
2075     // TODO: This is inefficient for very large strings (it is O(n^^2)).
2076     // We should take advantage of fast multiplication once the numbers exceed
2077     // Karatsuba size.
2078     uint lo = 0; // number of powers of digits, 0 .. 18
2079     uint x = 0;
2080     ulong y = 0;
2081     uint hi = 0; // number of base 1e19 digits
2082     data[0] = 0; // initially number is 0.
2083     if (data.length > 1)
2084         data[1] = 0;
2085 
2086     foreach (character; s)
2087     {
2088         if (character == '_')
2089             continue;
2090 
2091         if (character < '0' || character > '9')
2092             throw new ConvException("invalid digit");
2093         x *= 10;
2094         x += character - '0';
2095         ++lo;
2096         if (lo == 9)
2097         {
2098             y = x;
2099             x = 0;
2100         }
2101         if (lo == 18)
2102         {
2103             y *= 10_0000_0000;
2104             y += x;
2105             x = 0;
2106         }
2107         if (lo == 19)
2108         {
2109             y *= 10;
2110             y += x;
2111             x = 0;
2112             // Multiply existing number by 10^19, then add y1.
2113             if (hi>0)
2114             {
2115                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2116                 ++hi;
2117                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2118                 ++hi;
2119             }
2120             else
2121                 hi = 2;
2122             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2123             c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2124             if (c != 0)
2125             {
2126                 data[hi]=c;
2127                 ++hi;
2128             }
2129             y = 0;
2130             lo = 0;
2131         }
2132     }
2133     // Now set y = all remaining digits.
2134     if (lo >= 18)
2135     {
2136     }
2137     else if (lo >= 9)
2138     {
2139         for (int k=9; k<lo; ++k) y*=10;
2140         y+=x;
2141     }
2142     else
2143     {
2144         for (int k=0; k<lo; ++k) y*=10;
2145         y+=x;
2146     }
2147     if (lo != 0)
2148     {
2149         if (hi == 0)
2150         {
2151             data[0] = cast(uint) y;
2152             if (data.length == 1)
2153             {
2154                 hi = 1;
2155             }
2156             else
2157             {
2158                 data[1] = cast(uint)(y >>> 32);
2159                 hi=2;
2160             }
2161         }
2162         else
2163         {
2164             while (lo>0)
2165             {
2166                 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2167                 if (c != 0)
2168                 {
2169                     data[hi]=c;
2170                     ++hi;
2171                 }
2172                 --lo;
2173             }
2174             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2175             if (y > 0xFFFF_FFFFL)
2176             {
2177                 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2178             }
2179             if (c != 0)
2180             {
2181                 data[hi]=c;
2182                 ++hi;
2183             }
2184         }
2185     }
2186     while (hi>1 && data[hi-1]==0)
2187         --hi;
2188     return hi;
2189 }
2190 
2191 
2192 // ------------------------
2193 // These in-place functions are only for internal use; they are incompatible
2194 // with COW.
2195 
2196 // Classic 'schoolbook' multiplication.
2197 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2198         const(BigDigit)[] right) pure nothrow @safe
2199 in
2200 {
2201     assert(result.length == left.length + right.length,
2202         "Result must be able to store left + right");
2203     assert(right.length>1, "right must not be empty");
2204 }
2205 do
2206 {
2207     result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2208     multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2209 }
2210 
2211 // Classic 'schoolbook' squaring
2212 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2213 in
2214 {
2215     assert(result.length == 2*x.length, "result must be twice as long as x");
2216     assert(x.length>1, "x must not be empty");
2217 }
2218 do
2219 {
2220     multibyteSquare(result, x);
2221 }
2222 
2223 
2224 // add two uints of possibly different lengths. Result must be as long
2225 // as the larger length.
2226 // Returns carry (0 or 1).
2227 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2228 pure nothrow @safe
2229 in
2230 {
2231     assert(result.length == left.length,
2232         "result and left must be of the same length");
2233     assert(left.length >= right.length,
2234         "left must be longer or of equal length to right");
2235     assert(right.length > 0, "right must not be empty");
2236 }
2237 do
2238 {
2239     uint carry = multibyteAdd(result[0 .. right.length],
2240             left[0 .. right.length], right, 0);
2241     if (right.length < left.length)
2242     {
2243         result[right.length .. left.length] = left[right.length .. $];
2244         carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2245     }
2246     return carry;
2247 }
2248 
2249 /* result = result - right
2250  * Returns carry = 1 if result was less than right.
2251 */
2252 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2253 pure nothrow @safe
2254 {
2255     assert(result.length >= right.length,
2256        "result must be longer or of equal length to right");
2257     uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2258     if (c && result.length > right.length)
2259         c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2260     return c;
2261 }
2262 
2263 /* result = result + right
2264 */
2265 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2266 pure nothrow @safe
2267 {
2268     assert(result.length >= right.length,
2269        "result must be longer or of equal length to right");
2270     uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2271     if (c && result.length > right.length)
2272        c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2273     return c;
2274 }
2275 
2276 /* performs result += wantSub? - right : right;
2277 */
2278 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2279         bool wantSub) pure nothrow @safe
2280 {
2281     if (wantSub)
2282         return subAssignSimple(result, right);
2283     else
2284         return addAssignSimple(result, right);
2285 }
2286 
2287 
2288 // return true if x<y, considering leading zeros
2289 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2290 {
2291     assert(x.length >= y.length,
2292        "x must be longer or of equal length to y");
2293     auto k = x.length-1;
2294     while (x[k]==0 && k >= y.length)
2295         --k;
2296     if (k >= y.length)
2297         return false;
2298     while (k>0 && x[k]==y[k])
2299         --k;
2300     return x[k] < y[k];
2301 }
2302 
2303 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2304 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2305     pure nothrow @safe
2306 {
2307     assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2308         "result must capable to store the maximum of x and y");
2309 
2310     size_t minlen;
2311     bool negative;
2312     if (x.length >= y.length)
2313     {
2314         minlen = y.length;
2315         negative = less(x, y);
2316     }
2317     else
2318     {
2319        minlen = x.length;
2320        negative = !less(y, x);
2321     }
2322     const (BigDigit)[] large, small;
2323     if (negative)
2324     {
2325         large = y; small = x;
2326     }
2327     else
2328     {
2329         large = x; small = y;
2330     }
2331 
2332     BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2333     if (x.length != y.length)
2334     {
2335         result[minlen .. large.length]= large[minlen..$];
2336         result[large.length..$] = 0;
2337         if (carry)
2338             multibyteIncrementAssign!('-')(result[minlen..$], carry);
2339     }
2340     return negative;
2341 }
2342 
2343 /* Determine how much space is required for the temporaries
2344  * when performing a Karatsuba multiplication.
2345  * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2346  * https://issues.dlang.org/show_bug.cgi?id=20493
2347  */
2348 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2349 {
2350     return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2351 }
2352 
2353 /* Sets result = x*y, using Karatsuba multiplication.
2354 * x must be longer or equal to y.
2355 * Valid only for balanced multiplies, where x is not shorter than y.
2356 * It is superior to schoolbook multiplication if and only if
2357 *    sqrt(2)*y.length > x.length > y.length.
2358 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2359 * The maximum allowable length of x and y is uint.max; but better algorithms
2360 * should be used far before that length is reached.
2361 * Params:
2362 * scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2363 */
2364 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2365         const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2366 {
2367     assert(x.length >= y.length, "x must be greater or equal to y");
2368     assert(result.length < uint.max, "Operands too large");
2369     assert(result.length == x.length + y.length,
2370         "result must be as large as x + y");
2371     if (x.length <= KARATSUBALIMIT)
2372     {
2373         return mulSimple(result, x, y);
2374     }
2375     // Must be almost square (otherwise, a schoolbook iteration is better)
2376     assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2377         "Bigint Internal Error: Asymmetric Karatsuba");
2378 
2379     // The subtractive version of Karatsuba multiply uses the following result:
2380     // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2381     // where mid = (x0-x1)*(y0-y1)
2382     // requiring 3 multiplies of length N, instead of 4.
2383     // The advantage of the subtractive over the additive version is that
2384     // the mid multiply cannot exceed length N. But there are subtleties:
2385     // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2386     // retain all of the leading zeros in the subtractions
2387 
2388     // half length, round up.
2389     auto half = (x.length >> 1) + (x.length & 1);
2390 
2391     const(BigDigit) [] x0 = x[0 .. half];
2392     const(BigDigit) [] x1 = x[half .. $];
2393     const(BigDigit) [] y0 = y[0 .. half];
2394     const(BigDigit) [] y1 = y[half .. $];
2395     BigDigit [] mid = scratchbuff[0 .. half*2];
2396     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2397     BigDigit [] resultLow = result[0 .. 2*half];
2398     BigDigit [] resultHigh = result[2*half .. $];
2399      // initially use result to store temporaries
2400     BigDigit [] xdiff= result[0 .. half];
2401     BigDigit [] ydiff = result[half .. half*2];
2402 
2403     // First, we calculate mid, and sign of mid
2404     immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2405                       ^ inplaceSub(ydiff, y0, y1);
2406     mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2407 
2408     // Low half of result gets x0 * y0. High half gets x1 * y1
2409 
2410     mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2411 
2412     if (2L * y1.length * y1.length < x1.length * x1.length)
2413     {
2414         // an asymmetric situation has been created.
2415         // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2416         // Applying one schoolbook multiply gives us two pieces each 1.2:1
2417         if (y1.length <= KARATSUBALIMIT)
2418             mulSimple(resultHigh, x1, y1);
2419         else
2420         {
2421             // divide x1 in two, then use schoolbook multiply on the two pieces.
2422             auto quarter = (x1.length >> 1) + (x1.length & 1);
2423             immutable ysmaller = (quarter >= y1.length);
2424             mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2425                 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2426             // Save the part which will be overwritten.
2427             immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2428             newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2429             mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2430                 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2431 
2432             resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2433         }
2434     }
2435     else
2436         mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2437 
2438     /* We now have result = x0y0 + (N*N)*x1y1
2439        Before adding or subtracting mid, we must calculate
2440        result += N * (x0y0 + x1y1)
2441        We can do this with three half-length additions. With a = x0y0, b = x1y1:
2442                       aHI aLO
2443         +       aHI   aLO
2444         +       bHI   bLO
2445         +  bHI  bLO
2446         =  R3   R2    R1   R0
2447         R1 = aHI + bLO + aLO
2448         R2 = aHI + bLO + aHI + carry_from_R1
2449         R3 = bHi + carry_from_R2
2450 
2451      It might actually be quicker to do it in two full-length additions:
2452      newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2453      addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2454    */
2455     BigDigit[] R1 = result[half .. half*2];
2456     BigDigit[] R2 = result[half*2 .. half*3];
2457     BigDigit[] R3 = result[half*3..$];
2458     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2459     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2460     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2461     if (c1+c2)
2462         multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2463     if (c1+c3)
2464         multibyteIncrementAssign!('+')(R3, c1+c3);
2465 
2466     // And finally we subtract mid
2467     addOrSubAssignSimple(result[half..$], mid, !midNegative);
2468 }
2469 
2470 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2471         BigDigit [] scratchbuff) pure nothrow @safe
2472 {
2473     // See mulKaratsuba for implementation comments.
2474     // Squaring is simpler, since it never gets asymmetric.
2475     assert(result.length < uint.max, "Operands too large");
2476     assert(result.length == 2*x.length,
2477         "result must be twice the length of x");
2478     if (x.length <= KARATSUBASQUARELIMIT)
2479     {
2480         return squareSimple(result, x);
2481     }
2482     // half length, round up.
2483     auto half = (x.length >> 1) + (x.length & 1);
2484 
2485     const(BigDigit)[] x0 = x[0 .. half];
2486     const(BigDigit)[] x1 = x[half .. $];
2487     BigDigit [] mid = scratchbuff[0 .. half*2];
2488     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2489      // initially use result to store temporaries
2490     BigDigit [] xdiff= result[0 .. half];
2491     const BigDigit [] ydiff = result[half .. half*2];
2492 
2493     // First, we calculate mid. We don't need its sign
2494     inplaceSub(xdiff, x0, x1);
2495     squareKaratsuba(mid, xdiff, newscratchbuff);
2496 
2497     // Set result = x0x0 + (N*N)*x1x1
2498     squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2499     squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2500 
2501     /* result += N * (x0x0 + x1x1)
2502        Do this with three half-length additions. With a = x0x0, b = x1x1:
2503         R1 = aHI + bLO + aLO
2504         R2 = aHI + bLO + aHI + carry_from_R1
2505         R3 = bHi + carry_from_R2
2506     */
2507     BigDigit[] R1 = result[half .. half*2];
2508     BigDigit[] R2 = result[half*2 .. half*3];
2509     BigDigit[] R3 = result[half*3..$];
2510     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2511     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2512     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2513     if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2514     if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2515 
2516     // And finally we subtract mid, which is always positive
2517     subAssignSimple(result[half..$], mid);
2518 }
2519 
2520 /* Knuth's Algorithm D, as presented in
2521  * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2522  * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2523  * Given u and v, calculates  quotient  = u / v, u = u % v.
2524  * v must be normalized (ie, the MSB of v must be 1).
2525  * The most significant words of quotient and u may be zero.
2526  * u[0 .. v.length] holds the remainder.
2527  */
2528 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2529     pure nothrow @safe
2530 {
2531     assert(quotient.length == u.length - v.length,
2532         "quotient has wrong length");
2533     assert(v.length > 1, "v must not be empty");
2534     assert(u.length >= v.length, "u must be larger or equal to v");
2535     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2536     assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2537     // BUG: This code only works if BigDigit is uint.
2538     uint vhi = v[$-1];
2539     uint vlo = v[$-2];
2540 
2541     for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2542     {
2543         // Compute estimate of quotient[j],
2544         // qhat = (three most significant words of u)/(two most sig words of v).
2545         uint qhat;
2546         if (u[j + v.length] == vhi)
2547         {
2548             // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2549             qhat = uint.max;
2550         }
2551         else
2552         {
2553             uint ulo = u[j + v.length - 2];
2554             version (D_InlineAsm_X86)
2555             {
2556                 // Note: On DMD, this is only ~10% faster than the non-asm code.
2557                 uint *p = &u[j + v.length - 1];
2558                 asm pure nothrow @trusted
2559                 {
2560                     mov EAX, p;
2561                     mov EDX, [EAX+4];
2562                     mov EAX, [EAX];
2563                     div dword ptr [vhi];
2564                     mov qhat, EAX;
2565                     mov ECX, EDX;
2566 div3by2correction:
2567                     mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2568                     sub EAX, ulo;
2569                     sbb EDX, ECX;
2570                     jbe div3by2done;
2571                     mov EAX, qhat;
2572                     dec EAX;
2573                     mov qhat, EAX;
2574                     add ECX, dword ptr [vhi];
2575                     jnc div3by2correction;
2576 div3by2done:    ;
2577                 }
2578             }
2579             else
2580             { // version (InlineAsm)
2581                 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2582                 immutable bigqhat = uu / vhi;
2583                 ulong rhat =  uu - bigqhat * vhi;
2584                 qhat = cast(uint) bigqhat;
2585 again:
2586                 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2587                 {
2588                     --qhat;
2589                     rhat += vhi;
2590                     if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2591                         goto again;
2592                 }
2593             } // version (InlineAsm)
2594         }
2595         // Multiply and subtract.
2596         uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2597 
2598         if (u[j+v.length] < carry)
2599         {
2600             // If we subtracted too much, add back
2601             --qhat;
2602             carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2603         }
2604         quotient[j] = qhat;
2605         u[j + v.length] = u[j + v.length] - carry;
2606     }
2607 }
2608 
2609 // TODO: Replace with a library call
2610 void itoaZeroPadded(char[] output, uint value)
2611     pure nothrow @safe @nogc
2612 {
2613     for (auto i = output.length; i--;)
2614     {
2615         if (value < 10)
2616         {
2617             output[i] = cast(char)(value + '0');
2618             value = 0;
2619         }
2620         else
2621         {
2622             output[i] = cast(char)(value % 10 + '0');
2623             value /= 10;
2624         }
2625     }
2626 }
2627 
2628 void toHexZeroPadded(char[] output, uint value,
2629         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2630 {
2631     ptrdiff_t x = output.length - 1;
2632     static immutable string upperHexDigits = "0123456789ABCDEF";
2633     static immutable string lowerHexDigits = "0123456789abcdef";
2634     for ( ; x >= 0; --x)
2635     {
2636         if (letterCase == LetterCase.upper)
2637         {
2638             output[x] = upperHexDigits[value & 0xF];
2639         }
2640         else
2641         {
2642             output[x] = lowerHexDigits[value & 0xF];
2643         }
2644         value >>= 4;
2645     }
2646 }
2647 
2648 // Returns the highest value of i for which left[i]!=right[i],
2649 // or 0 if left[] == right[]
2650 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2651 pure nothrow @nogc @safe
2652 {
2653     assert(left.length == right.length,
2654         "left have a length equal to that of right");
2655     for (ptrdiff_t i = left.length - 1; i>0; --i)
2656     {
2657         if (left[i] != right[i])
2658             return i;
2659     }
2660     return 0;
2661 }
2662 
2663 // Returns the lowest value of i for which x[i]!=0.
2664 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2665 {
2666     int k = 0;
2667     while (x[k]==0)
2668     {
2669         ++k;
2670         assert(k < x.length, "k must be less than x.length");
2671     }
2672     return k;
2673 }
2674 
2675 /*
2676     Calculate quotient and remainder of u / v using fast recursive division.
2677     v must be normalised, and must be at least half as long as u.
2678     Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2679     scratch is temporary storage space, length must be >= quotient + 1.
2680 
2681 Returns:
2682     u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2683 
2684     Implements algorithm 1.8 from MCA.
2685     This algorithm has an annoying special case. After the first recursion, the
2686     highest bit of the quotient may be set. This means that in the second
2687     recursive call, the 'in' contract would be violated. (This happens only
2688     when the top quarter of u is equal to the top half of v. A base 10
2689     equivalent example of this situation is 5517/56; the first step gives
2690     55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2691     u and the quotient. 'mayOverflow' indicates that that the special case
2692     has occurred.
2693     (In MCA, a different strategy is used: the in contract is weakened, and
2694     schoolbookDivMod is more general: it allows the high bit of u to be set).
2695     See also:
2696     - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2697       Max-Planck Institute fuer Informatik, (Oct 1998).
2698 */
2699 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2700                      BigDigit[] scratch, bool mayOverflow = false)
2701                      pure nothrow @safe
2702 in
2703 {
2704     // v must be normalized
2705     assert(v.length > 1, "v must not be empty");
2706     assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2707     assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2708     assert(quotient.length == u.length - v.length,
2709         "quotient must be of equal length of u - v");
2710     if (mayOverflow)
2711     {
2712         assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2713         assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2714     }
2715 
2716     // Must be symmetric. Use block schoolbook division if not.
2717     assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2718         "Invalid length of u");
2719     assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2720         "Invalid length of u");
2721     assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2722         "Invalid quotient length");
2723 }
2724 do
2725 {
2726     if (quotient.length < FASTDIVLIMIT)
2727     {
2728         return schoolbookDivMod(quotient, u, v);
2729     }
2730 
2731     // Split quotient into two halves, but keep padding in the top half
2732     auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2733 
2734     // RECURSION 1: Calculate the high half of the quotient
2735 
2736     // Note that if u and quotient were padded, they remain padded during
2737     // this call, so in contract is satisfied.
2738     recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2739         scratch, mayOverflow);
2740 
2741     // quotient[k..$] is our guess at the high quotient.
2742     // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2743     // first remainder. u[0 .. 2*k] is the low part.
2744 
2745     // Calculate the full first remainder to be
2746     //    remainder - highQuotient * lowDivisor
2747     // reducing highQuotient until the remainder is positive.
2748     // The low part of the remainder, u[0 .. k], cannot be altered by this.
2749 
2750     adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2751             scratch[0 .. quotient.length], mayOverflow);
2752 
2753     // RECURSION 2: Calculate the low half of the quotient
2754     // The full first remainder is now in u[0 .. k + v.length].
2755 
2756     if (u[k + v.length - 1] & 0x8000_0000)
2757     {
2758         // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2759         // This means we need an extra quotient word for the next recursion.
2760         // We need to restore the invariant for the recursive calls.
2761         // We do this by padding both u and quotient. Extending u is trivial,
2762         // because the higher words will not be used again. But for the
2763         // quotient, we're clobbering the low word of the high quotient,
2764         // so we need save it, and add it back in after the recursive call.
2765 
2766         auto clobberedQuotient = quotient[k];
2767         u[k+v.length] = 0;
2768 
2769         recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2770             v[k .. $], scratch, true);
2771         adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2772             scratch[0 .. 2 * k+1], true);
2773 
2774         // Now add the quotient word that got clobbered earlier.
2775         multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2776     }
2777     else
2778     {
2779         // The special case has NOT happened.
2780         recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2781             scratch, false);
2782 
2783         // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2784 
2785         adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2786             scratch[0 .. 2 * k]);
2787     }
2788 }
2789 
2790 // rem -= quot * v[0 .. k].
2791 // If would make rem negative, decrease quot until rem is >= 0.
2792 // Needs (quot.length * k) scratch space to store the result of the multiply.
2793 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2794         ptrdiff_t k,
2795         BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2796 {
2797     assert(rem.length == v.length, "rem must be as long as v");
2798     mulInternal(scratch, quot, v[0 .. k]);
2799     uint carry = 0;
2800     if (mayOverflow)
2801         carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2802     else
2803         carry = subAssignSimple(rem, scratch);
2804     while (carry)
2805     {
2806         multibyteIncrementAssign!('-')(quot, 1); // quot--
2807         carry -= multibyteAdd(rem, rem, v, 0);
2808     }
2809 }
2810 
2811 // Cope with unbalanced division by performing block schoolbook division.
2812 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2813 pure nothrow @safe
2814 {
2815     import core.memory : GC;
2816     assert(quotient.length == u.length - v.length,
2817         "quotient must be of equal length of u - v");
2818     assert(v.length > 1, "v must not be empty");
2819     assert(u.length >= v.length, "u must be longer or of equal length as v");
2820     assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2821     assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2822     BigDigit [] scratch = new BigDigit[v.length + 1];
2823 
2824     // Perform block schoolbook division, with 'v.length' blocks.
2825     auto m = u.length - v.length;
2826     while (m > v.length)
2827     {
2828         immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2829         BigDigit saveq;
2830         if (mayOverflow)
2831         {
2832             u[m + v.length] = 0;
2833             saveq = quotient[m];
2834         }
2835         recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2836             u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2837         if (mayOverflow)
2838         {
2839             assert(quotient[m] == 0, "quotient must not be 0");
2840             quotient[m] = saveq;
2841         }
2842         m -= v.length;
2843     }
2844     recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2845     () @trusted { GC.free(scratch.ptr); } ();
2846 }
2847 
2848 @system unittest
2849 {
2850     version (none)
2851     {
2852         import core.stdc.stdio;
2853 
2854         void printBiguint(const uint [] data)
2855         {
2856             char [] buff = biguintToHex(new char[data.length*9], data, '_');
2857             printf("%.*s\n", cast(int) buff.length, buff.ptr);
2858         }
2859 
2860         void printDecimalBigUint(BigUint data)
2861         {
2862             auto str = data.toDecimalString(0);
2863             printf("%.*s\n", cast(int) str.length, str.ptr);
2864         }
2865     }
2866 
2867     uint [] a, b;
2868     a = new uint[43];
2869     b = new uint[179];
2870     for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2871     for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2872 
2873     a[$-1] |= 0x8000_0000;
2874     uint [] r = new uint[a.length];
2875     uint [] q = new uint[b.length-a.length+1];
2876 
2877     divModInternal(q, r, b, a);
2878     q = q[0..$-1];
2879     uint [] r1 = r.dup;
2880     uint [] q1 = q.dup;
2881     blockDivMod(q, b, a);
2882     r = b[0 .. a.length];
2883     assert(r[] == r1[]);
2884     assert(q[] == q1[]);
2885 }
2886 
2887 // biguintToOctal
2888 @safe unittest
2889 {
2890     enum bufSize = 5 * BigDigitBits / 3 + 1;
2891     auto buf = new char[bufSize];
2892     size_t i;
2893     BigDigit[] data = [ 342391 ];
2894 
2895     // Basic functionality with single word
2896     i = biguintToOctal(buf, data);
2897     assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2898 
2899     // Test carrying bits between words
2900     data = [ 0x77053977, 0x39770539, 0x00000005 ];
2901     i = biguintToOctal(buf, data);
2902     assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2903 
2904     // Test carried bits in the last word
2905     data = [ 0x80000000 ];
2906     i = biguintToOctal(buf, data);
2907     assert(buf[i .. $] == "20000000000");
2908 
2909     // Test boundary between 3rd and 4th word where the number of bits is
2910     // divisible by 3 and no bits should be carried.
2911     //
2912     // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2913     // rollover happens, so that if any bugs happen in wrongly adding the carry
2914     // to the next word, non-zero bits will show up in the output.
2915     data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2916     i = biguintToOctal(buf, data);
2917     assert(buf[i .. $] == "2060000000001400000000030000000000");
2918 
2919     // Boundary case: 0
2920     data = [ 0 ];
2921     i = biguintToOctal(buf, data);
2922     assert(buf[i .. $] == "0");
2923 }