1 /** Arbitrary precision arithmetic ('bignum') for processors with no asm support 2 * 3 * All functions operate on arrays of uints, stored LSB first. 4 * If there is a destination array, it will be the first parameter. 5 * Currently, all of these functions are subject to change, and are 6 * intended for internal use only. 7 * This module is intended only to assist development of high-speed routines 8 * on currently unsupported processors. 9 * The X86 asm version is about 30 times faster than the D version (DMD). 10 */ 11 12 /* Copyright Don Clugston 2008 - 2010. 13 * Distributed under the Boost Software License, Version 1.0. 14 * (See accompanying file LICENSE_1_0.txt or copy at 15 * http://www.boost.org/LICENSE_1_0.txt) 16 */ 17 18 module std.internal.math.biguintnoasm; 19 20 nothrow: 21 @safe: 22 23 public: 24 alias BigDigit = uint; // A Bignum is an array of BigDigits. 25 26 // Limits for when to switch between multiplication algorithms. 27 enum int KARATSUBALIMIT = 10; // Minimum value for which Karatsuba is worthwhile. 28 enum int KARATSUBASQUARELIMIT = 12; // Minimum value for which square Karatsuba is worthwhile 29 30 31 /** Multi-byte addition or subtraction 32 * dest[] = src1[] + src2[] + carry (0 or 1). 33 * or dest[] = src1[] - src2[] - carry (0 or 1). 34 * Returns carry or borrow (0 or 1). 35 * Set op == '+' for addition, '-' for subtraction. 36 */ 37 uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1, 38 const (uint) [] src2, uint carry) pure @nogc @safe 39 { 40 ulong c = carry; 41 for (size_t i = 0; i < src2.length; ++i) 42 { 43 static if (op=='+') c = c + src1[i] + src2[i]; 44 else c = cast(ulong) src1[i] - src2[i] - c; 45 dest[i] = cast(uint) c; 46 c = (c > 0xFFFF_FFFF); 47 } 48 return cast(uint) c; 49 } 50 51 @safe unittest 52 { 53 uint [] a = new uint[40]; 54 uint [] b = new uint[40]; 55 uint [] c = new uint[40]; 56 for (size_t i = 0; i < a.length; ++i) 57 { 58 if (i&1) a[i]=cast(uint)(0x8000_0000 + i); 59 else a[i]=cast(uint) i; 60 b[i]= 0x8000_0003; 61 } 62 c[19]=0x3333_3333; 63 uint carry = multibyteAddSub!('+')(c[0 .. 18], b[0 .. 18], a[0 .. 18], 0); 64 assert(c[0]==0x8000_0003, "c[0] has invalid value"); 65 assert(c[1]==4, "c[1] must be for"); 66 assert(c[19]==0x3333_3333, "c[19] has invalid value"); // check for overrun 67 assert(carry == 1, "carry must be 1"); 68 for (size_t i = 0; i < a.length; ++i) 69 { 70 a[i] = b[i] = c[i] = 0; 71 } 72 a[8]=0x048D159E; 73 b[8]=0x048D159E; 74 a[10]=0x1D950C84; 75 b[10]=0x1D950C84; 76 a[5] =0x44444444; 77 carry = multibyteAddSub!('-')(a[0 .. 12], a[0 .. 12], b[0 .. 12], 0); 78 assert(a[11] == 0, "a[11] must be 0"); 79 for (size_t i = 0; i < 10; ++i) 80 if (i != 5) 81 assert(a[i] == 0, "a[1] must be 0"); 82 83 for (size_t q = 3; q < 36; ++q) 84 { 85 for (size_t i = 0; i< a.length; ++i) 86 { 87 a[i] = b[i] = c[i] = 0; 88 } 89 a[q-2]=0x040000; 90 b[q-2]=0x040000; 91 carry = multibyteAddSub!('-')(a[0 .. q], a[0 .. q], b[0 .. q], 0); 92 assert(a[q-2]==0, "a[q-2] must be 0"); 93 } 94 } 95 96 97 98 /** dest[] += carry, or dest[] -= carry. 99 * op must be '+' or '-' 100 * Returns final carry or borrow (0 or 1) 101 */ 102 uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) 103 pure @nogc @safe 104 { 105 static if (op=='+') 106 { 107 ulong c = carry; 108 c += dest[0]; 109 dest[0] = cast(uint) c; 110 if (c <= 0xFFFF_FFFF) 111 return 0; 112 113 for (size_t i = 1; i < dest.length; ++i) 114 { 115 ++dest[i]; 116 if (dest[i] != 0) 117 return 0; 118 } 119 return 1; 120 } 121 else 122 { 123 ulong c = carry; 124 c = dest[0] - c; 125 dest[0] = cast(uint) c; 126 if (c <= 0xFFFF_FFFF) 127 return 0; 128 for (size_t i = 1; i < dest.length; ++i) 129 { 130 --dest[i]; 131 if (dest[i] != 0xFFFF_FFFF) 132 return 0; 133 } 134 return 1; 135 } 136 } 137 138 /** dest[] = src[] << numbits 139 * numbits must be in the range 1 .. 31 140 */ 141 uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits) 142 pure @nogc @safe 143 { 144 ulong c = 0; 145 for (size_t i = 0; i < dest.length; ++i) 146 { 147 c += (cast(ulong)(src[i]) << numbits); 148 dest[i] = cast(uint) c; 149 c >>>= 32; 150 } 151 return cast(uint) c; 152 } 153 154 155 /** dest[] = src[] >> numbits 156 * numbits must be in the range 1 .. 31 157 */ 158 void multibyteShr(uint [] dest, const(uint) [] src, uint numbits) 159 pure @nogc @safe 160 { 161 ulong c = 0; 162 for (ptrdiff_t i = dest.length; i != 0; --i) 163 { 164 c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits)); 165 dest[i-1] = cast(uint) c; 166 c >>>= 32; 167 } 168 } 169 170 @safe unittest 171 { 172 173 uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; 174 multibyteShr(aa[0..$-2], aa, 4); 175 assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999); 176 assert(aa[3] == 0xBCCC_CCCD); 177 178 aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; 179 multibyteShr(aa[0..$-1], aa, 4); 180 assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 181 && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC); 182 183 aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 184 0xEEEE_EEEE]; 185 multibyteShl(aa[1 .. 4], aa[1..$], 4); 186 assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 187 && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD); 188 } 189 190 /** dest[] = src[] * multiplier + carry. 191 * Returns carry. 192 */ 193 uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry) 194 pure @nogc @safe 195 { 196 assert(dest.length == src.length, "dest and src must have the same length"); 197 ulong c = carry; 198 for (size_t i = 0; i < src.length; ++i) 199 { 200 c += cast(ulong)(src[i]) * multiplier; 201 dest[i] = cast(uint) c; 202 c>>=32; 203 } 204 return cast(uint) c; 205 } 206 207 @safe unittest 208 { 209 uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 210 0xBCCC_CCCD, 0xEEEE_EEEE]; 211 multibyteMul(aa[1 .. 4], aa[1 .. 4], 16, 0); 212 assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561 213 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD); 214 } 215 216 /** 217 * dest[] += src[] * multiplier + carry(0 .. FFFF_FFFF). 218 * Returns carry out of MSB (0 .. FFFF_FFFF). 219 */ 220 uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src, 221 uint multiplier, uint carry) pure @nogc @safe 222 { 223 assert(dest.length == src.length, "dest and src must have the same length"); 224 ulong c = carry; 225 for (size_t i = 0; i < src.length; ++i) 226 { 227 static if (op=='+') 228 { 229 c += cast(ulong)(multiplier) * src[i] + dest[i]; 230 dest[i] = cast(uint) c; 231 c >>= 32; 232 } 233 else 234 { 235 c += cast(ulong) multiplier * src[i]; 236 ulong t = cast(ulong) dest[i] - cast(uint) c; 237 dest[i] = cast(uint) t; 238 c = cast(uint)((c >> 32) - (t >> 32)); 239 } 240 } 241 return cast(uint) c; 242 } 243 244 @safe unittest 245 { 246 247 uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 248 0xBCCC_CCCD, 0xEEEE_EEEE]; 249 uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0, 250 0xC0C0_C0C0]; 251 multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5); 252 assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0); 253 assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5 254 && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1 255 && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 ); 256 } 257 258 259 /** 260 Sets result = result[0 .. left.length] + left * right 261 262 It is defined in this way to allow cache-efficient multiplication. 263 This function is equivalent to: 264 ---- 265 for (size_t i = 0; i< right.length; ++i) 266 { 267 dest[left.length + i] = multibyteMulAdd(dest[i .. left.length+i], 268 left, right[i], 0); 269 } 270 ---- 271 */ 272 void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint) 273 [] right) pure @nogc @safe 274 { 275 for (size_t i = 0; i < right.length; ++i) 276 { 277 dest[left.length + i] = multibyteMulAdd!('+')(dest[i .. left.length+i], 278 left, right[i], 0); 279 } 280 } 281 282 /** dest[] /= divisor. 283 * overflow is the initial remainder, and must be in the range 0 .. divisor-1. 284 */ 285 uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow) 286 pure @nogc @safe 287 { 288 ulong c = cast(ulong) overflow; 289 for (ptrdiff_t i = dest.length-1; i >= 0; --i) 290 { 291 c = (c << 32) + cast(ulong)(dest[i]); 292 uint q = cast(uint)(c/divisor); 293 c -= divisor * q; 294 dest[i] = q; 295 } 296 return cast(uint) c; 297 } 298 299 @safe unittest 300 { 301 uint [] aa = new uint[101]; 302 for (uint i = 0; i < aa.length; ++i) 303 aa[i] = 0x8765_4321 * (i+3); 304 uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461); 305 uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow); 306 for (uint i=0; i<aa.length; ++i) 307 { 308 assert(aa[i] == 0x8765_4321 * (i+3)); 309 } 310 assert(r == 0x33FF_7461); 311 312 } 313 // Set dest[2*i .. 2*i+1]+=src[i]*src[i] 314 void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src) 315 pure @nogc @safe 316 { 317 ulong c = 0; 318 for (size_t i = 0; i < src.length; ++i) 319 { 320 // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000. 321 c += cast(ulong)(src[i]) * src[i] + dest[2*i]; 322 dest[2*i] = cast(uint) c; 323 c = (c>>=32) + dest[2*i+1]; 324 dest[2*i+1] = cast(uint) c; 325 c >>= 32; 326 } 327 } 328 329 // Does half a square multiply. (square = diagonal + 2*triangle) 330 void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x) 331 pure @nogc @safe 332 { 333 // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$] 334 dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0); 335 if (x.length < 4) 336 { 337 if (x.length == 3) 338 { 339 ulong c = cast(ulong)(x[$-1]) * x[$-2] + dest[2*x.length-3]; 340 dest[2*x.length - 3] = cast(uint) c; 341 c >>= 32; 342 dest[2*x.length - 2] = cast(uint) c; 343 } 344 return; 345 } 346 for (size_t i = 2; i < x.length - 2; ++i) 347 { 348 dest[i-1+ x.length] = multibyteMulAdd!('+')( 349 dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0); 350 } 351 // Unroll the last two entries, to reduce loop overhead: 352 ulong c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5]; 353 dest[2*x.length-5] = cast(uint) c; 354 c >>= 32; 355 c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4]; 356 dest[2*x.length-4] = cast(uint) c; 357 c >>= 32; 358 c += cast(ulong)(x[$-1]) * x[$-2]; 359 dest[2*x.length-3] = cast(uint) c; 360 c >>= 32; 361 dest[2*x.length-2] = cast(uint) c; 362 } 363 364 void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure @nogc @safe 365 { 366 multibyteTriangleAccumulate(result, x); 367 result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2 368 result[0] = 0; 369 multibyteAddDiagonalSquares(result, x); 370 }