1 /** Arbitrary precision arithmetic ('bignum') for processors with no asm support
2  *
3  * All functions operate on arrays of uints, stored LSB first.
4  * If there is a destination array, it will be the first parameter.
5  * Currently, all of these functions are subject to change, and are
6  * intended for internal use only.
7  * This module is intended only to assist development of high-speed routines
8  * on currently unsupported processors.
9  * The X86 asm version is about 30 times faster than the D version (DMD).
10  */
11 
12 /*          Copyright Don Clugston 2008 - 2010.
13  * Distributed under the Boost Software License, Version 1.0.
14  *    (See accompanying file LICENSE_1_0.txt or copy at
15  *          http://www.boost.org/LICENSE_1_0.txt)
16  */
17 
18 module std.internal.math.biguintnoasm;
19 
20 nothrow:
21 @safe:
22 
23 public:
24 alias BigDigit = uint; // A Bignum is an array of BigDigits.
25 
26     // Limits for when to switch between multiplication algorithms.
27 enum int KARATSUBALIMIT = 10; // Minimum value for which Karatsuba is worthwhile.
28 enum int KARATSUBASQUARELIMIT = 12; // Minimum value for which square Karatsuba is worthwhile
29 
30 
31 /** Multi-byte addition or subtraction
32  *    dest[] = src1[] + src2[] + carry (0 or 1).
33  * or dest[] = src1[] - src2[] - carry (0 or 1).
34  * Returns carry or borrow (0 or 1).
35  * Set op == '+' for addition, '-' for subtraction.
36  */
37 uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
38     const (uint) [] src2, uint carry) pure @nogc @safe
39 {
40     ulong c = carry;
41     for (size_t i = 0; i < src2.length; ++i)
42     {
43         static if (op=='+') c = c  + src1[i] + src2[i];
44              else           c = cast(ulong) src1[i] - src2[i] - c;
45         dest[i] = cast(uint) c;
46         c = (c > 0xFFFF_FFFF);
47     }
48     return cast(uint) c;
49 }
50 
51 @safe unittest
52 {
53     uint [] a = new uint[40];
54     uint [] b = new uint[40];
55     uint [] c = new uint[40];
56     for (size_t i = 0; i < a.length; ++i)
57     {
58         if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
59         else a[i]=cast(uint) i;
60         b[i]= 0x8000_0003;
61     }
62     c[19]=0x3333_3333;
63     uint carry = multibyteAddSub!('+')(c[0 .. 18], b[0 .. 18], a[0 .. 18], 0);
64     assert(c[0]==0x8000_0003, "c[0] has invalid value");
65     assert(c[1]==4, "c[1] must be for");
66     assert(c[19]==0x3333_3333, "c[19] has invalid value"); // check for overrun
67     assert(carry == 1, "carry must be 1");
68     for (size_t i = 0; i < a.length; ++i)
69     {
70         a[i] = b[i] = c[i] = 0;
71     }
72     a[8]=0x048D159E;
73     b[8]=0x048D159E;
74     a[10]=0x1D950C84;
75     b[10]=0x1D950C84;
76     a[5] =0x44444444;
77     carry = multibyteAddSub!('-')(a[0 .. 12], a[0 .. 12], b[0 .. 12], 0);
78     assert(a[11] == 0, "a[11] must be 0");
79     for (size_t i = 0; i < 10; ++i)
80         if (i != 5)
81             assert(a[i] == 0, "a[1] must be 0");
82 
83     for (size_t q = 3; q < 36; ++q)
84     {
85         for (size_t i = 0; i< a.length; ++i)
86         {
87             a[i] = b[i] = c[i] = 0;
88         }
89         a[q-2]=0x040000;
90         b[q-2]=0x040000;
91        carry = multibyteAddSub!('-')(a[0 .. q], a[0 .. q], b[0 .. q], 0);
92        assert(a[q-2]==0, "a[q-2] must be 0");
93     }
94 }
95 
96 
97 
98 /** dest[] += carry, or dest[] -= carry.
99  *  op must be '+' or '-'
100  *  Returns final carry or borrow (0 or 1)
101  */
102 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
103     pure @nogc @safe
104 {
105     static if (op=='+')
106     {
107         ulong c = carry;
108         c += dest[0];
109         dest[0] = cast(uint) c;
110         if (c <= 0xFFFF_FFFF)
111             return 0;
112 
113         for (size_t i = 1; i < dest.length; ++i)
114         {
115             ++dest[i];
116             if (dest[i] != 0)
117                 return 0;
118         }
119         return 1;
120     }
121     else
122     {
123         ulong c = carry;
124         c = dest[0] - c;
125         dest[0] = cast(uint) c;
126         if (c <= 0xFFFF_FFFF)
127             return 0;
128         for (size_t i = 1; i < dest.length; ++i)
129         {
130             --dest[i];
131             if (dest[i] != 0xFFFF_FFFF)
132                 return 0;
133         }
134         return 1;
135     }
136 }
137 
138 /** dest[] = src[] << numbits
139  *  numbits must be in the range 1 .. 31
140  */
141 uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits)
142     pure @nogc @safe
143 {
144     ulong c = 0;
145     for (size_t i = 0; i < dest.length; ++i)
146     {
147         c += (cast(ulong)(src[i]) << numbits);
148         dest[i] = cast(uint) c;
149         c >>>= 32;
150    }
151    return cast(uint) c;
152 }
153 
154 
155 /** dest[] = src[] >> numbits
156  *  numbits must be in the range 1 .. 31
157  */
158 void multibyteShr(uint [] dest, const(uint) [] src, uint numbits)
159     pure @nogc @safe
160 {
161     ulong c = 0;
162     for (ptrdiff_t i = dest.length; i != 0; --i)
163     {
164         c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
165         dest[i-1] = cast(uint) c;
166         c >>>= 32;
167    }
168 }
169 
170 @safe unittest
171 {
172 
173     uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
174     multibyteShr(aa[0..$-2], aa, 4);
175     assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
176     assert(aa[3] == 0xBCCC_CCCD);
177 
178     aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
179     multibyteShr(aa[0..$-1], aa, 4);
180     assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
181         && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);
182 
183     aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
184         0xEEEE_EEEE];
185     multibyteShl(aa[1 .. 4], aa[1..$], 4);
186     assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
187         && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
188 }
189 
190 /** dest[] = src[] * multiplier + carry.
191  * Returns carry.
192  */
193 uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
194     pure @nogc @safe
195 {
196     assert(dest.length == src.length, "dest and src must have the same length");
197     ulong c = carry;
198     for (size_t i = 0; i < src.length; ++i)
199     {
200         c += cast(ulong)(src[i]) * multiplier;
201         dest[i] = cast(uint) c;
202         c>>=32;
203     }
204     return cast(uint) c;
205 }
206 
207 @safe unittest
208 {
209     uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
210         0xBCCC_CCCD, 0xEEEE_EEEE];
211     multibyteMul(aa[1 .. 4], aa[1 .. 4], 16, 0);
212     assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
213         && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
214 }
215 
216 /**
217  * dest[] += src[] * multiplier + carry(0 .. FFFF_FFFF).
218  * Returns carry out of MSB (0 .. FFFF_FFFF).
219  */
220 uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
221     uint multiplier, uint carry) pure @nogc @safe
222 {
223     assert(dest.length == src.length, "dest and src must have the same length");
224     ulong c = carry;
225     for (size_t i = 0; i < src.length; ++i)
226     {
227         static if (op=='+')
228         {
229             c += cast(ulong)(multiplier) * src[i]  + dest[i];
230             dest[i] = cast(uint) c;
231             c >>= 32;
232         }
233         else
234         {
235             c += cast(ulong) multiplier * src[i];
236             ulong t = cast(ulong) dest[i] - cast(uint) c;
237             dest[i] = cast(uint) t;
238             c = cast(uint)((c >> 32) - (t >> 32));
239         }
240     }
241     return cast(uint) c;
242 }
243 
244 @safe unittest
245 {
246 
247     uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
248         0xBCCC_CCCD, 0xEEEE_EEEE];
249     uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
250         0xC0C0_C0C0];
251     multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
252     assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
253     assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
254         && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
255         && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
256 }
257 
258 
259 /**
260    Sets result = result[0 .. left.length] + left * right
261 
262    It is defined in this way to allow cache-efficient multiplication.
263    This function is equivalent to:
264     ----
265     for (size_t i = 0; i< right.length; ++i)
266     {
267         dest[left.length + i] = multibyteMulAdd(dest[i .. left.length+i],
268                 left, right[i], 0);
269     }
270     ----
271  */
272 void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
273         [] right) pure @nogc @safe
274 {
275     for (size_t i = 0; i < right.length; ++i)
276     {
277         dest[left.length + i] = multibyteMulAdd!('+')(dest[i .. left.length+i],
278                 left, right[i], 0);
279     }
280 }
281 
282 /**  dest[] /= divisor.
283  * overflow is the initial remainder, and must be in the range 0 .. divisor-1.
284  */
285 uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow)
286     pure @nogc @safe
287 {
288     ulong c = cast(ulong) overflow;
289     for (ptrdiff_t i = dest.length-1; i >= 0; --i)
290     {
291         c = (c << 32) + cast(ulong)(dest[i]);
292         uint q = cast(uint)(c/divisor);
293         c -= divisor * q;
294         dest[i] = q;
295     }
296     return cast(uint) c;
297 }
298 
299 @safe unittest
300 {
301     uint [] aa = new uint[101];
302     for (uint i = 0; i < aa.length; ++i)
303         aa[i] = 0x8765_4321 * (i+3);
304     uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
305     uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
306     for (uint i=0; i<aa.length; ++i)
307     {
308         assert(aa[i] == 0x8765_4321 * (i+3));
309     }
310     assert(r == 0x33FF_7461);
311 
312 }
313 // Set dest[2*i .. 2*i+1]+=src[i]*src[i]
314 void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src)
315     pure @nogc @safe
316 {
317     ulong c = 0;
318     for (size_t i = 0; i < src.length; ++i)
319     {
320         // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
321         c += cast(ulong)(src[i]) * src[i] + dest[2*i];
322         dest[2*i] = cast(uint) c;
323         c = (c>>=32) + dest[2*i+1];
324         dest[2*i+1] = cast(uint) c;
325         c >>= 32;
326     }
327 }
328 
329 // Does half a square multiply. (square = diagonal + 2*triangle)
330 void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x)
331     pure @nogc @safe
332 {
333     // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
334     dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
335     if (x.length < 4)
336     {
337         if (x.length == 3)
338         {
339             ulong c = cast(ulong)(x[$-1]) * x[$-2]  + dest[2*x.length-3];
340             dest[2*x.length - 3] = cast(uint) c;
341             c >>= 32;
342             dest[2*x.length - 2] = cast(uint) c;
343         }
344         return;
345     }
346     for (size_t i = 2; i < x.length - 2; ++i)
347     {
348         dest[i-1+ x.length] = multibyteMulAdd!('+')(
349              dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
350     }
351         // Unroll the last two entries, to reduce loop overhead:
352     ulong  c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
353     dest[2*x.length-5] = cast(uint) c;
354     c >>= 32;
355     c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
356     dest[2*x.length-4] = cast(uint) c;
357     c >>= 32;
358     c += cast(ulong)(x[$-1]) * x[$-2];
359     dest[2*x.length-3] = cast(uint) c;
360     c >>= 32;
361     dest[2*x.length-2] = cast(uint) c;
362 }
363 
364 void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure @nogc @safe
365 {
366     multibyteTriangleAccumulate(result, x);
367     result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
368     result[0] = 0;
369     multibyteAddDiagonalSquares(result, x);
370 }