symphonia_core/checksum/
md5.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8use std::cmp;
9
10use crate::io::Monitor;
11
12fn transform(state: &mut [u32; 4], buf: &[u8]) {
13    // Assert to hopefully force the compiler to elide bounds checks on buf.
14    assert!(buf.len() == 64);
15
16    let mut input = [0u32; 16];
17
18    // Collect 4 bytes from an input buffer and store as a u32 in the output buffer. Note: input
19    // bytes are considered little-endian for MD5.
20    macro_rules! collect {
21        ($output:ident, $input:ident, $idx:expr) => {
22            $output[$idx] = u32::from_le_bytes([
23                $input[$idx * 4 + 0],
24                $input[$idx * 4 + 1],
25                $input[$idx * 4 + 2],
26                $input[$idx * 4 + 3],
27            ]);
28        };
29    }
30
31    collect!(input, buf, 0);
32    collect!(input, buf, 1);
33    collect!(input, buf, 2);
34    collect!(input, buf, 3);
35    collect!(input, buf, 4);
36    collect!(input, buf, 5);
37    collect!(input, buf, 6);
38    collect!(input, buf, 7);
39    collect!(input, buf, 8);
40    collect!(input, buf, 9);
41    collect!(input, buf, 10);
42    collect!(input, buf, 11);
43    collect!(input, buf, 12);
44    collect!(input, buf, 13);
45    collect!(input, buf, 14);
46    collect!(input, buf, 15);
47
48    // The transformation for a single step of a round: A = B + ROTL32(F + A + K[i] + M[g], S).
49    macro_rules! round_step {
50        ($a:ident, $b:ident, $f:expr, $m:expr, $s:expr, $k:expr) => {
51            $a = $f.wrapping_add($a).wrapping_add($k).wrapping_add($m);
52            $a = $b.wrapping_add($a.rotate_left($s));
53        };
54    }
55
56    let mut a = state[0];
57    let mut b = state[1];
58    let mut c = state[2];
59    let mut d = state[3];
60
61    // Round 1: F(B, C, D) = D xor (B and (C xor D))
62    {
63        macro_rules! T {
64            ($a:ident, $b:ident, $c:ident, $d:ident, $m:expr, $s:expr, $k:expr) => {
65                round_step!($a, $b, $d ^ ($b & ($c ^ $d)), $m, $s, $k);
66            };
67        }
68
69        T!(a, b, c, d, input[0], 7, 0xd76aa478);
70        T!(d, a, b, c, input[1], 12, 0xe8c7b756);
71        T!(c, d, a, b, input[2], 17, 0x242070db);
72        T!(b, c, d, a, input[3], 22, 0xc1bdceee);
73        T!(a, b, c, d, input[4], 7, 0xf57c0faf);
74        T!(d, a, b, c, input[5], 12, 0x4787c62a);
75        T!(c, d, a, b, input[6], 17, 0xa8304613);
76        T!(b, c, d, a, input[7], 22, 0xfd469501);
77        T!(a, b, c, d, input[8], 7, 0x698098d8);
78        T!(d, a, b, c, input[9], 12, 0x8b44f7af);
79        T!(c, d, a, b, input[10], 17, 0xffff5bb1);
80        T!(b, c, d, a, input[11], 22, 0x895cd7be);
81        T!(a, b, c, d, input[12], 7, 0x6b901122);
82        T!(d, a, b, c, input[13], 12, 0xfd987193);
83        T!(c, d, a, b, input[14], 17, 0xa679438e);
84        T!(b, c, d, a, input[15], 22, 0x49b40821);
85    }
86
87    // Round 2: G(B, C, D) = C xor (D and (B xor C))
88    {
89        macro_rules! T {
90            ($a:ident, $b:ident, $c:ident, $d:ident, $m:expr, $s:expr, $k:expr) => {
91                round_step!($a, $b, $c ^ ($d & ($b ^ $c)), $m, $s, $k);
92            };
93        }
94
95        T!(a, b, c, d, input[1], 5, 0xf61e2562);
96        T!(d, a, b, c, input[6], 9, 0xc040b340);
97        T!(c, d, a, b, input[11], 14, 0x265e5a51);
98        T!(b, c, d, a, input[0], 20, 0xe9b6c7aa);
99        T!(a, b, c, d, input[5], 5, 0xd62f105d);
100        T!(d, a, b, c, input[10], 9, 0x02441453);
101        T!(c, d, a, b, input[15], 14, 0xd8a1e681);
102        T!(b, c, d, a, input[4], 20, 0xe7d3fbc8);
103        T!(a, b, c, d, input[9], 5, 0x21e1cde6);
104        T!(d, a, b, c, input[14], 9, 0xc33707d6);
105        T!(c, d, a, b, input[3], 14, 0xf4d50d87);
106        T!(b, c, d, a, input[8], 20, 0x455a14ed);
107        T!(a, b, c, d, input[13], 5, 0xa9e3e905);
108        T!(d, a, b, c, input[2], 9, 0xfcefa3f8);
109        T!(c, d, a, b, input[7], 14, 0x676f02d9);
110        T!(b, c, d, a, input[12], 20, 0x8d2a4c8a);
111    }
112
113    // Round 3: H(B, C, D) = B xor C xor D
114    {
115        macro_rules! T {
116            ($a:ident, $b:ident, $c:ident, $d:ident, $m:expr, $s:expr, $k:expr) => {
117                round_step!($a, $b, $b ^ $c ^ $d, $m, $s, $k);
118            };
119        }
120
121        T!(a, b, c, d, input[5], 4, 0xfffa3942);
122        T!(d, a, b, c, input[8], 11, 0x8771f681);
123        T!(c, d, a, b, input[11], 16, 0x6d9d6122);
124        T!(b, c, d, a, input[14], 23, 0xfde5380c);
125        T!(a, b, c, d, input[1], 4, 0xa4beea44);
126        T!(d, a, b, c, input[4], 11, 0x4bdecfa9);
127        T!(c, d, a, b, input[7], 16, 0xf6bb4b60);
128        T!(b, c, d, a, input[10], 23, 0xbebfbc70);
129        T!(a, b, c, d, input[13], 4, 0x289b7ec6);
130        T!(d, a, b, c, input[0], 11, 0xeaa127fa);
131        T!(c, d, a, b, input[3], 16, 0xd4ef3085);
132        T!(b, c, d, a, input[6], 23, 0x04881d05);
133        T!(a, b, c, d, input[9], 4, 0xd9d4d039);
134        T!(d, a, b, c, input[12], 11, 0xe6db99e5);
135        T!(c, d, a, b, input[15], 16, 0x1fa27cf8);
136        T!(b, c, d, a, input[2], 23, 0xc4ac5665);
137    }
138
139    // Round 4: I(B,C,D) = C xor (B or (not D))
140    {
141        macro_rules! T {
142            ($a:ident, $b:ident, $c:ident, $d:ident, $m:expr, $s:expr, $k:expr) => {
143                round_step!($a, $b, $c ^ ($b | !$d), $m, $s, $k);
144            };
145        }
146
147        T!(a, b, c, d, input[0], 6, 0xf4292244);
148        T!(d, a, b, c, input[7], 10, 0x432aff97);
149        T!(c, d, a, b, input[14], 15, 0xab9423a7);
150        T!(b, c, d, a, input[5], 21, 0xfc93a039);
151        T!(a, b, c, d, input[12], 6, 0x655b59c3);
152        T!(d, a, b, c, input[3], 10, 0x8f0ccc92);
153        T!(c, d, a, b, input[10], 15, 0xffeff47d);
154        T!(b, c, d, a, input[1], 21, 0x85845dd1);
155        T!(a, b, c, d, input[8], 6, 0x6fa87e4f);
156        T!(d, a, b, c, input[15], 10, 0xfe2ce6e0);
157        T!(c, d, a, b, input[6], 15, 0xa3014314);
158        T!(b, c, d, a, input[13], 21, 0x4e0811a1);
159        T!(a, b, c, d, input[4], 6, 0xf7537e82);
160        T!(d, a, b, c, input[11], 10, 0xbd3af235);
161        T!(c, d, a, b, input[2], 15, 0x2ad7d2bb);
162        T!(b, c, d, a, input[9], 21, 0xeb86d391);
163    }
164
165    state[0] = state[0].wrapping_add(a);
166    state[1] = state[1].wrapping_add(b);
167    state[2] = state[2].wrapping_add(c);
168    state[3] = state[3].wrapping_add(d);
169}
170
171/// `Md5` implements the MD5 hashing algorithm.
172pub struct Md5 {
173    state: [u32; 4],
174    block: [u8; Md5::BLOCK_LEN],
175    len: u64,
176}
177
178impl Default for Md5 {
179    fn default() -> Self {
180        Md5 {
181            state: [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476],
182            block: [0; Md5::BLOCK_LEN],
183            len: 0,
184        }
185    }
186}
187
188impl Md5 {
189    const BLOCK_LEN: usize = 64;
190    const BLOCK_LEN_MASK: u64 = 0x3f;
191
192    /// Finalizes and returns the computed MD5 hash.
193    pub fn md5(&self) -> [u8; 16] {
194        // Finalize locally.
195        let mut block = [0; Md5::BLOCK_LEN];
196        let mut state = self.state;
197
198        // The block length is the amount of data buffered for the current block.
199        let block_len = (self.len & Md5::BLOCK_LEN_MASK) as usize;
200
201        // The block length should *always* be less than the MD5 block length if the process_*
202        // functions transform the block when it's full.
203        assert!(block_len < Md5::BLOCK_LEN);
204
205        // Copy the buffered block data locally for finalization.
206        block[..block_len].copy_from_slice(&self.block[..block_len]);
207
208        // Append the message terminator to the block.
209        block[block_len] = 0x80;
210
211        // If the message length can not be appended to the block, transform the block, and start
212        // a new block.
213        if Md5::BLOCK_LEN - block_len - 1 < 8 {
214            transform(&mut state, &block);
215            block = [0; Md5::BLOCK_LEN];
216        }
217
218        // The final 8 bytes of the final block contain the message length in bits mod 2^64.
219        block[Md5::BLOCK_LEN - 8..Md5::BLOCK_LEN].copy_from_slice(&(self.len << 3).to_le_bytes());
220        transform(&mut state, &block);
221
222        // The message digest is in big-endian.
223        let mut hash = [0; 16];
224        hash[0..4].copy_from_slice(&state[0].to_le_bytes());
225        hash[4..8].copy_from_slice(&state[1].to_le_bytes());
226        hash[8..12].copy_from_slice(&state[2].to_le_bytes());
227        hash[12..16].copy_from_slice(&state[3].to_le_bytes());
228        hash
229    }
230}
231
232impl Monitor for Md5 {
233    #[inline(always)]
234    fn process_byte(&mut self, byte: u8) {
235        self.block[(self.len & Md5::BLOCK_LEN_MASK) as usize] = byte;
236        self.len += 1;
237
238        // Atleast 1 bytes has been written (see above) and the length is a multiple of the MD5
239        // block length, therefore the current block is full. Perform a MD5 transformation on the
240        // current block.
241        if self.len & Md5::BLOCK_LEN_MASK == 0 {
242            transform(&mut self.state, &self.block);
243        }
244    }
245
246    #[inline(always)]
247    fn process_buf_bytes(&mut self, buf: &[u8]) {
248        let mut rem = buf;
249
250        while !rem.is_empty() {
251            let block_len = (self.len & Md5::BLOCK_LEN_MASK) as usize;
252
253            let copy_len = cmp::min(rem.len(), Md5::BLOCK_LEN - block_len);
254
255            self.len += copy_len as u64;
256
257            // If the copy length is a whole block then perform the transformation directly from the
258            // source buffer.
259            if copy_len == Md5::BLOCK_LEN {
260                transform(&mut self.state, &rem[..copy_len]);
261            }
262            else {
263                // If the copy length is less than a whole block, buffer it into the current block.
264                self.block[block_len..block_len + copy_len].copy_from_slice(&rem[..copy_len]);
265
266                if self.len & Md5::BLOCK_LEN_MASK == 0 {
267                    transform(&mut self.state, &self.block);
268                }
269            }
270
271            rem = &rem[copy_len..];
272        }
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::Md5;
279    use super::Monitor;
280
281    #[test]
282    fn verify_md5() {
283        const STRINGS: [&[u8]; 8] = [
284            b"",
285            b"a",
286            b"abc",
287            b"The quick brown fox jumps over the lazy dog",
288            b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
289            b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!",
290            b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!?",
291            b".s)cyIl?XKs}wDnLEUeZj'72=A/0!w;B[e*QUh)0{&XcGvf'xMx5Chhx_'ahg{GP|_R(0=Xe`lXQN_@MK9::",
292        ];
293
294        #[rustfmt::skip]
295        const HASHES: [[u8; 16]; 8] = [
296            [
297                0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
298                0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
299            ],
300            [
301                0x0c, 0xc1, 0x75, 0xb9, 0xc0, 0xf1, 0xb6, 0xa8,
302                0x31, 0xc3, 0x99, 0xe2, 0x69, 0x77, 0x26, 0x61,
303            ],
304            [
305                0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0,
306                0xd6, 0x96, 0x3f, 0x7d, 0x28, 0xe1, 0x7f, 0x72,
307            ],
308            [
309                0x9e, 0x10, 0x7d, 0x9d, 0x37, 0x2b, 0xb6, 0x82,
310                0x6b, 0xd8, 0x1d, 0x35, 0x42, 0xa4, 0x19, 0xd6,
311            ],
312            [
313                0xd1, 0x74, 0xab, 0x98, 0xd2, 0x77, 0xd9, 0xf5,
314                0xa5, 0x61, 0x1c, 0x2c, 0x9f, 0x41, 0x9d, 0x9f,
315            ],
316            [
317                0x64, 0x1b, 0xa6, 0x02, 0x88, 0xc1, 0x7a, 0x2d,
318                0xa5, 0x09, 0x00, 0x77, 0xeb, 0x89, 0x58, 0xad,
319            ],
320            [
321                0x0a, 0x71, 0xdb, 0x4d, 0xf3, 0x50, 0x92, 0x73,
322                0x62, 0x42, 0x3a, 0x42, 0xdc, 0xf8, 0x14, 0x57,
323            ],
324            [
325                0x0b, 0x76, 0x74, 0x7e, 0xfd, 0xcd, 0xb9, 0x33,
326                0x67, 0xfe, 0x2d, 0xa3, 0x21, 0x1b, 0x5d, 0x41,
327            ],
328        ];
329
330        // As a buffer.
331        for (string, hash) in STRINGS.iter().zip(&HASHES) {
332            let mut md5: Md5 = Default::default();
333
334            md5.process_buf_bytes(string);
335
336            assert_eq!(*hash, md5.md5());
337        }
338
339        // As partial buffers.
340        for (string, hash) in STRINGS.iter().zip(&HASHES) {
341            let mut md5: Md5 = Default::default();
342
343            for bytes in string.chunks(21) {
344                md5.process_buf_bytes(bytes);
345            }
346
347            assert_eq!(*hash, md5.md5());
348        }
349
350        // Byte-by-byte
351        for (string, hash) in STRINGS.iter().zip(&HASHES) {
352            let mut md5: Md5 = Default::default();
353
354            for byte in string.iter() {
355                md5.process_byte(*byte);
356            }
357
358            assert_eq!(*hash, md5.md5());
359        }
360    }
361}