1 /**
2  * Computes MD5 hashes of arbitrary data. MD5 hashes are 16 byte quantities that are like a
3  * checksum or CRC, but are more robust.
4  *
5 $(SCRIPT inhibitQuickIndex = 1;)
6 
7 $(DIVC quickindex,
8 $(BOOKTABLE ,
9 $(TR $(TH Category) $(TH Functions)
10 )
11 $(TR $(TDNW Template API) $(TD $(MYREF MD5)
12 )
13 )
14 $(TR $(TDNW OOP API) $(TD $(MYREF MD5Digest))
15 )
16 $(TR $(TDNW Helpers) $(TD $(MYREF md5Of))
17 )
18 )
19 )
20 
21  * This module conforms to the APIs defined in `std.digest`. To understand the
22  * differences between the template and the OOP API, see $(MREF std, digest).
23  *
24  * This module publicly imports $(MREF std, digest) and can be used as a stand-alone
25  * module.
26  *
27  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
28  *
29  * CTFE:
30  * Digests do not work in CTFE
31  *
32  * Authors:
33  * Piotr Szturmaj, Kai Nacke, Johannes Pfau $(BR)
34  * The routines and algorithms are derived from the $(I RSA Data Security, Inc. MD5 Message-Digest Algorithm).
35  *
36  * References:
37  *      $(LINK2 http://en.wikipedia.org/wiki/Md5, Wikipedia on MD5)
38  *
39  * Source: $(PHOBOSSRC std/digest/md.d)
40  *
41  */
42 
43 /* md5.d - RSA Data Security, Inc., MD5 message-digest algorithm
44  * Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm.
45  */
46 module std.digest.md;
47 
48 public import std.digest;
49 
50 ///
51 @safe unittest
52 {
53     //Template API
54     import std.digest.md;
55 
56     //Feeding data
57     ubyte[1024] data;
58     MD5 md5;
59     md5.start();
60     md5.put(data[]);
61     md5.start(); //Start again
62     md5.put(data[]);
63     auto hash = md5.finish();
64 }
65 
66 ///
67 @safe unittest
68 {
69     //OOP API
70     import std.digest.md;
71 
72     auto md5 = new MD5Digest();
73     ubyte[] hash = md5.digest("abc");
74     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
75 
76     //Feeding data
77     ubyte[1024] data;
78     md5.put(data[]);
79     md5.reset(); //Start again
80     md5.put(data[]);
81     hash = md5.finish();
82 }
83 
84 /**
85  * Template API MD5 implementation.
86  * See `std.digest` for differences between template and OOP API.
87  */
88 struct MD5
89 {
90     import core.bitop : rol;
91     private:
92         // magic initialization constants
93         uint[4] _state = [0x67452301,0xefcdab89,0x98badcfe,0x10325476]; // state (ABCD)
94         ulong _count; //number of bits, modulo 2^64
95         ubyte[64] _buffer; // input buffer
96 
97         static immutable ubyte[64] _padding =
98         [
99           0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
102         ];
103 
104         // F, G, H and I are basic MD5 functions
105         static @safe pure nothrow @nogc
106         {
107             uint F(uint x, uint y, uint z) { return (x & y) | (~x & z); }
108             uint G(uint x, uint y, uint z) { return (x & z) | (y & ~z); }
109             uint H(uint x, uint y, uint z) { return x ^ y ^ z; }
110             uint I(uint x, uint y, uint z) { return y ^ (x | ~z); }
111         }
112 
113 
114         /*
115          * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
116          * Rotation is separate from addition to prevent recomputation.
117          */
118         static void FF(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
119             @safe pure nothrow @nogc
120         {
121             a += F (b, c, d) + x + ac;
122             a = rol(a, s);
123             a += b;
124         }
125 
126         static void GG(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
127             @safe pure nothrow @nogc
128         {
129             a += G (b, c, d) + x + ac;
130             a = rol(a, s);
131             a += b;
132         }
133 
134         static void HH(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
135             @safe pure nothrow @nogc
136         {
137             a += H (b, c, d) + x + ac;
138             a = rol(a, s);
139             a += b;
140         }
141 
142         static void II(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
143             @safe pure nothrow @nogc
144         {
145             a += I (b, c, d) + x + ac;
146             a = rol(a, s);
147             a += b;
148         }
149 
150         /*
151          * MD5 basic transformation. Transforms state based on block.
152          */
153 
154         //Constants for MD5Transform routine.
155         enum
156         {
157             S11 = 7,
158             S12 = 12,
159             S13 = 17,
160             S14 = 22,
161             S21 = 5,
162             S22 = 9,
163             S23 = 14,
164             S24 = 20,
165             S31 = 4,
166             S32 = 11,
167             S33 = 16,
168             S34 = 23,
169             S41 = 6,
170             S42 = 10,
171             S43 = 15,
172             S44 = 21,
173         }
174 
175         private void transform(const(ubyte[64])* block) pure nothrow @nogc
176         {
177             uint a = _state[0],
178                  b = _state[1],
179                  c = _state[2],
180                  d = _state[3];
181 
182             uint[16] x = void;
183 
184             version (BigEndian)
185             {
186                 import std.bitmanip : littleEndianToNative;
187 
188                 for (size_t i = 0; i < 16; i++)
189                 {
190                     x[i] = littleEndianToNative!uint(*cast(ubyte[4]*)&(*block)[i*4]);
191                 }
192             }
193             else
194             {
195                 (cast(ubyte*) x.ptr)[0 .. 64] = (cast(ubyte*) block)[0 .. 64];
196             }
197 
198             //Round 1
199             FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
200             FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
201             FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
202             FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
203             FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
204             FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
205             FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
206             FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
207             FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
208             FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
209             FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
210             FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
211             FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
212             FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
213             FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
214             FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
215 
216             //Round 2
217             GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
218             GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
219             GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
220             GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
221             GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
222             GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
223             GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
224             GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
225             GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
226             GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
227             GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
228             GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
229             GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
230             GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
231             GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
232             GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
233 
234             //Round 3
235             HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
236             HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
237             HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
238             HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
239             HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
240             HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
241             HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
242             HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
243             HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
244             HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
245             HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
246             HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
247             HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
248             HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
249             HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
250             HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
251 
252             //Round 4
253             II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
254             II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
255             II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
256             II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
257             II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
258             II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
259             II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
260             II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
261             II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
262             II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
263             II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
264             II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
265             II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
266             II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
267             II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
268             II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
269 
270             _state[0] += a;
271             _state[1] += b;
272             _state[2] += c;
273             _state[3] += d;
274 
275             //Zeroize sensitive information.
276             x[] = 0;
277         }
278 
279     public:
280         enum blockSize = 512;
281 
282         /**
283          * Use this to feed the digest with data.
284          * Also implements the $(REF isOutputRange, std,range,primitives)
285          * interface for `ubyte` and `const(ubyte)[]`.
286          *
287          * Example:
288          * ----
289          * MD5 dig;
290          * dig.put(cast(ubyte) 0); //single ubyte
291          * dig.put(cast(ubyte) 0, cast(ubyte) 0); //variadic
292          * ubyte[10] buf;
293          * dig.put(buf); //buffer
294          * ----
295          */
296         void put(scope const(ubyte)[] data...) @trusted pure nothrow @nogc
297         {
298             size_t i;
299             uint index, partLen;
300             auto inputLen = data.length;
301 
302             //Compute number of bytes mod 64
303             index = (cast(uint)_count >> 3) & (64 - 1);
304 
305             //Update number of bits
306             _count += inputLen * 8;
307 
308             partLen = 64 - index;
309 
310             //Transform as many times as possible
311             if (inputLen >= partLen)
312             {
313                 (&_buffer[index])[0 .. partLen] = data.ptr[0 .. partLen];
314                 transform(&_buffer);
315 
316                 for (i = partLen; i + 63 < inputLen; i += 64)
317                 {
318                     transform(cast(const(ubyte[64])*)(data[i .. i + 64].ptr));
319                 }
320 
321                 index = 0;
322             }
323             else
324             {
325                 i = 0;
326             }
327 
328             /* Buffer remaining input */
329             if (inputLen - i)
330                 (&_buffer[index])[0 .. inputLen-i] = (&data[i])[0 .. inputLen-i];
331         }
332 
333         /**
334          * Used to (re)initialize the MD5 digest.
335          *
336          * Note:
337          * For this MD5 Digest implementation calling start after default construction
338          * is not necessary. Calling start is only necessary to reset the Digest.
339          *
340          * Generic code which deals with different Digest types should always call start though.
341          *
342          * Example:
343          * --------
344          * MD5 digest;
345          * //digest.start(); //Not necessary
346          * digest.put(0);
347          * --------
348          */
349         void start() @safe pure nothrow @nogc
350         {
351             this = MD5.init;
352         }
353 
354         /**
355          * Returns the finished MD5 hash. This also calls $(LREF start) to
356          * reset the internal state.
357           */
358         ubyte[16] finish() @trusted pure nothrow @nogc
359         {
360             import std.bitmanip : nativeToLittleEndian;
361 
362             ubyte[16] data = void;
363             ubyte[8] bits = void;
364             uint index, padLen;
365 
366             //Save number of bits
367             bits[0 .. 8] = nativeToLittleEndian(_count)[];
368 
369             //Pad out to 56 mod 64
370             index = (cast(uint)_count >> 3) & (64 - 1);
371             padLen = (index < 56) ? (56 - index) : (120 - index);
372             put(_padding[0 .. padLen]);
373 
374             //Append length (before padding)
375             put(bits);
376 
377             //Store state in digest
378             data[0 .. 4]   = nativeToLittleEndian(_state[0])[];
379             data[4 .. 8]   = nativeToLittleEndian(_state[1])[];
380             data[8 .. 12]  = nativeToLittleEndian(_state[2])[];
381             data[12 .. 16] = nativeToLittleEndian(_state[3])[];
382 
383             /* Zeroize sensitive information. */
384             start();
385             return data;
386         }
387         ///
388         @safe unittest
389         {
390             //Simple example
391             MD5 hash;
392             hash.start();
393             hash.put(cast(ubyte) 0);
394             ubyte[16] result = hash.finish();
395         }
396 }
397 
398 ///
399 @safe unittest
400 {
401     //Simple example, hashing a string using md5Of helper function
402     ubyte[16] hash = md5Of("abc");
403     //Let's get a hash string
404     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
405 }
406 
407 ///
408 @safe unittest
409 {
410     //Using the basic API
411     MD5 hash;
412     hash.start();
413     ubyte[1024] data;
414     //Initialize data here...
415     hash.put(data);
416     ubyte[16] result = hash.finish();
417 }
418 
419 ///
420 @safe unittest
421 {
422     //Let's use the template features:
423     void doSomething(T)(ref T hash)
424     if (isDigest!T)
425     {
426         hash.put(cast(ubyte) 0);
427     }
428     MD5 md5;
429     md5.start();
430     doSomething(md5);
431     assert(toHexString(md5.finish()) == "93B885ADFE0DA089CDF634904FD59F71");
432 }
433 
434 @safe unittest
435 {
436     assert(isDigest!MD5);
437 }
438 
439 @system unittest
440 {
441     import std.range;
442     import std.conv : hexString;
443 
444     ubyte[16] digest;
445 
446     MD5 md5;
447     md5.put(cast(ubyte[])"abcdef");
448     md5.start();
449     md5.put(cast(ubyte[])"");
450     assert(md5.finish() == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
451 
452     digest = md5Of("");
453     assert(digest == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
454 
455     digest = md5Of("a");
456     assert(digest == cast(ubyte[]) hexString!"0cc175b9c0f1b6a831c399e269772661");
457 
458     digest = md5Of("abc");
459     assert(digest == cast(ubyte[]) hexString!"900150983cd24fb0d6963f7d28e17f72");
460 
461     digest = md5Of("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
462     assert(digest == cast(ubyte[]) hexString!"8215ef0796a20bcaaae116d3876c664a");
463 
464     digest = md5Of("message digest");
465     assert(digest == cast(ubyte[]) hexString!"f96b697d7cb7938d525a2f31aaf161d0");
466 
467     digest = md5Of("abcdefghijklmnopqrstuvwxyz");
468     assert(digest == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
469 
470     digest = md5Of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
471     assert(digest == cast(ubyte[]) hexString!"d174ab98d277d9f5a5611c2c9f419d9f");
472 
473     digest = md5Of("1234567890123456789012345678901234567890"~
474                     "1234567890123456789012345678901234567890");
475     assert(digest == cast(ubyte[]) hexString!"57edf4a22be3c955ac49da2e2107b67a");
476 
477     enum ubyte[16] input = cast(ubyte[16]) hexString!"c3fcd3d76192e4007dfb496cca67e13b";
478     assert(toHexString(input)
479         == "C3FCD3D76192E4007DFB496CCA67E13B");
480 
481     ubyte[] onemilliona = new ubyte[1000000];
482     onemilliona[] = 'a';
483     digest = md5Of(onemilliona);
484     assert(digest == cast(ubyte[]) hexString!"7707D6AE4E027C70EEA2A935C2296F21");
485 
486     auto oneMillionRange = repeat!ubyte(cast(ubyte)'a', 1000000);
487     digest = md5Of(oneMillionRange);
488     assert(digest == cast(ubyte[]) hexString!"7707D6AE4E027C70EEA2A935C2296F21");
489 }
490 
491 /**
492  * This is a convenience alias for $(REF digest, std,digest) using the
493  * MD5 implementation.
494  */
495 //simple alias doesn't work here, hope this gets inlined...
496 auto md5Of(T...)(T data)
497 {
498     return digest!(MD5, T)(data);
499 }
500 
501 ///
502 @safe unittest
503 {
504     ubyte[16] hash = md5Of("abc");
505     assert(hash == digest!MD5("abc"));
506 }
507 
508 /**
509  * OOP API MD5 implementation.
510  * See `std.digest` for differences between template and OOP API.
511  *
512  * This is an alias for $(D $(REF WrapperDigest, std,digest)!MD5), see
513  * there for more information.
514  */
515 alias MD5Digest = WrapperDigest!MD5;
516 
517 ///
518 @safe unittest
519 {
520     //Simple example, hashing a string using Digest.digest helper function
521     auto md5 = new MD5Digest();
522     ubyte[] hash = md5.digest("abc");
523     //Let's get a hash string
524     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
525 }
526 
527 ///
528 @system unittest
529 {
530      //Let's use the OOP features:
531     void test(Digest dig)
532     {
533       dig.put(cast(ubyte) 0);
534     }
535     auto md5 = new MD5Digest();
536     test(md5);
537 
538     //Let's use a custom buffer:
539     ubyte[16] buf;
540     ubyte[] result = md5.finish(buf[]);
541     assert(toHexString(result) == "93B885ADFE0DA089CDF634904FD59F71");
542 }
543 
544 @system unittest
545 {
546     import std.conv : hexString;
547     auto md5 = new MD5Digest();
548 
549     md5.put(cast(ubyte[])"abcdef");
550     md5.reset();
551     md5.put(cast(ubyte[])"");
552     assert(md5.finish() == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
553 
554     md5.put(cast(ubyte[])"abcdefghijklmnopqrstuvwxyz");
555     ubyte[20] result;
556     auto result2 = md5.finish(result[]);
557     assert(result[0 .. 16] == result2 && result2 == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
558 
559     debug
560     {
561         import std.exception;
562         assertThrown!Error(md5.finish(result[0 .. 15]));
563     }
564 
565     assert(md5.length == 16);
566 
567     assert(md5.digest("") == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
568 
569     assert(md5.digest("a") == cast(ubyte[]) hexString!"0cc175b9c0f1b6a831c399e269772661");
570 
571     assert(md5.digest("abc") == cast(ubyte[]) hexString!"900150983cd24fb0d6963f7d28e17f72");
572 
573     assert(md5.digest("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")
574            == cast(ubyte[]) hexString!"8215ef0796a20bcaaae116d3876c664a");
575 
576     assert(md5.digest("message digest") == cast(ubyte[]) hexString!"f96b697d7cb7938d525a2f31aaf161d0");
577 
578     assert(md5.digest("abcdefghijklmnopqrstuvwxyz")
579            == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
580 
581     assert(md5.digest("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
582            == cast(ubyte[]) hexString!"d174ab98d277d9f5a5611c2c9f419d9f");
583 
584     assert(md5.digest("1234567890123456789012345678901234567890",
585                                    "1234567890123456789012345678901234567890")
586            == cast(ubyte[]) hexString!"57edf4a22be3c955ac49da2e2107b67a");
587 }