Fix sequence type derivation in the presence of negation types.
[sbcl.git] / contrib / sb-md5 / md5.lisp
blobf8dc3c6d8bcf21fe808f46177a1cd4860be28978
1 ;;;; This file implements The MD5 Message-Digest Algorithm, as defined in
2 ;;;; RFC 1321 by R. Rivest, published April 1992.
3 ;;;;
4 ;;;; It was written by Pierre R. Mai, with copious input from the
5 ;;;; cmucl-help mailing-list hosted at cons.org, in November 2001 and
6 ;;;; has been placed into the public domain.
7 ;;;;
8 ;;;; $Id$
9 ;;;;
10 ;;;; While the implementation should work on all conforming Common
11 ;;;; Lisp implementations, it has only been optimized for CMU CL,
12 ;;;; where it achieved comparable performance to the standard md5sum
13 ;;;; utility (within a factor of 1.5 or less on iA32 and UltraSparc
14 ;;;; hardware).
15 ;;;;
16 ;;;; Since the implementation makes heavy use of arithmetic on
17 ;;;; (unsigned-byte 32) numbers, acceptable performance is likely only
18 ;;;; on CL implementations that support unboxed arithmetic on such
19 ;;;; numbers in some form. For other CL implementations a 16bit
20 ;;;; implementation of MD5 is probably more suitable.
21 ;;;;
22 ;;;; The code implements correct operation for files of unbounded size
23 ;;;; as is, at the cost of having to do a single generic integer
24 ;;;; addition for each call to update-md5-state. If you call
25 ;;;; update-md5-state frequently with little data, this can pose a
26 ;;;; performance problem. If you can live with a size restriction of
27 ;;;; 512 MB, then you can enable fast fixnum arithmetic by putting
28 ;;;; :md5-small-length onto *features* prior to compiling this file.
29 ;;;;
30 ;;;; This software is "as is", and has no warranty of any kind. The
31 ;;;; authors assume no responsibility for the consequences of any use
32 ;;;; of this software.
34 (defpackage :SB-MD5 (:use :CL)
35 (:export
36 ;; Low-Level types and functions
37 #:md5-regs #:initial-md5-regs #:md5regs-digest
38 #:update-md5-block #:fill-block #:fill-block-ub8 #:fill-block-char
39 ;; Mid-Level types and functions
40 #:md5-state #:md5-state-p #:make-md5-state
41 #:update-md5-state #:finalize-md5-state
42 ;; High-Level functions on sequences, streams and files
43 #:md5sum-sequence #:md5sum-string #:md5sum-stream #:md5sum-file))
45 (in-package :sb-md5)
47 ;;; Section 2: Basic Datatypes
49 (deftype ub32 ()
50 "Corresponds to the 32bit quantity word of the MD5 Spec"
51 `(unsigned-byte 32))
53 (defmacro assemble-ub32 (a b c d)
54 "Assemble an ub32 value from the given (unsigned-byte 8) values,
55 where a is the intended low-order byte and d the high-order byte."
56 `(the ub32 (logior (ash ,d 24) (ash ,c 16) (ash ,b 8) ,a)))
58 ;;; Section 3.4: Auxilliary functions
60 (declaim (inline f g h i)
61 (ftype (function (ub32 ub32 ub32) ub32) f g h i))
63 (defun f (x y z)
64 (declare (type ub32 x y z)
65 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
66 (logior (logand x y) (logandc1 x z)))
68 (defun g (x y z)
69 (declare (type ub32 x y z)
70 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
71 (logior (logand x z) (logandc2 y z)))
73 (defun h (x y z)
74 (declare (type ub32 x y z)
75 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
76 (logxor x y z))
78 (defun i (x y z)
79 (declare (type ub32 x y z)
80 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
81 (ldb (byte 32 0) (logxor y (logorc2 x z))))
83 (declaim (inline mod32+)
84 (ftype (function (ub32 ub32) ub32) mod32+))
85 (defun mod32+ (a b)
86 (declare (type ub32 a b) (optimize (speed 3) (safety 0) (space 0) (debug 0)))
87 (ldb (byte 32 0) (+ a b)))
89 ;;; Dunno why we need this, but without it MOD32+ wasn't being
90 ;;; inlined. Oh well. -- CSR, 2003-09-14
91 (define-compiler-macro mod32+ (a b)
92 `(ldb (byte 32 0) (+ ,a ,b)))
94 (declaim (inline rol32)
95 (ftype (function (ub32 (unsigned-byte 5)) ub32) rol32))
96 (defun rol32 (a s)
97 (declare (type ub32 a) (type (unsigned-byte 5) s)
98 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
99 (sb-rotate-byte:rotate-byte s (byte 32 0) a))
101 ;;; Section 3.4: Table T
103 (eval-when (:compile-toplevel :load-toplevel :execute)
104 (defparameter *t* (make-array 64 :element-type 'ub32
105 :initial-contents
106 (loop for i from 1 to 64
107 collect
108 (truncate
109 (* 4294967296
110 (abs (sin (float i 0.0d0)))))))))
112 ;;; Section 3.4: Helper Macro for single round definitions
114 (defmacro with-md5-round ((op block) &rest clauses)
115 (loop for (a b c d k s i) in clauses
116 collect
117 `(setq ,a (mod32+ ,b (rol32 (mod32+ (mod32+ ,a (,op ,b ,c ,d))
118 (mod32+ (aref ,block ,k)
119 ,(aref *t* (1- i))))
120 ,s)))
121 into result
122 finally
123 (return `(progn ,@result))))
125 ;;; Section 3.3: (Initial) MD5 Working Set
127 (deftype md5-regs ()
128 "The working state of the MD5 algorithm, which contains the 4 32-bit
129 registers A, B, C and D."
130 `(simple-array (unsigned-byte 32) (4)))
132 (defmacro md5-regs-a (regs)
133 `(aref ,regs 0))
135 (defmacro md5-regs-b (regs)
136 `(aref ,regs 1))
138 (defmacro md5-regs-c (regs)
139 `(aref ,regs 2))
141 (defmacro md5-regs-d (regs)
142 `(aref ,regs 3))
144 (defconstant +md5-magic-a+ (assemble-ub32 #x01 #x23 #x45 #x67)
145 "Initial value of Register A of the MD5 working state.")
146 (defconstant +md5-magic-b+ (assemble-ub32 #x89 #xab #xcd #xef)
147 "Initial value of Register B of the MD5 working state.")
148 (defconstant +md5-magic-c+ (assemble-ub32 #xfe #xdc #xba #x98)
149 "Initial value of Register C of the MD5 working state.")
150 (defconstant +md5-magic-d+ (assemble-ub32 #x76 #x54 #x32 #x10)
151 "Initial value of Register D of the MD5 working state.")
153 (declaim (inline initial-md5-regs))
154 (defun initial-md5-regs ()
155 "Create the initial working state of an MD5 run."
156 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0)))
157 (let ((regs (make-array 4 :element-type '(unsigned-byte 32))))
158 (declare (type md5-regs regs))
159 (setf (md5-regs-a regs) +md5-magic-a+
160 (md5-regs-b regs) +md5-magic-b+
161 (md5-regs-c regs) +md5-magic-c+
162 (md5-regs-d regs) +md5-magic-d+)
163 regs))
165 ;;; Section 3.4: Operation on 16-Word Blocks
167 (defun update-md5-block (regs block)
168 "This is the core part of the MD5 algorithm. It takes a complete 16
169 word block of input, and updates the working state in A, B, C, and D
170 accordingly."
171 (declare (type md5-regs regs)
172 (type (simple-array ub32 (16)) block)
173 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
174 (let ((a (md5-regs-a regs)) (b (md5-regs-b regs))
175 (c (md5-regs-c regs)) (d (md5-regs-d regs)))
176 (declare (type ub32 a b c d))
177 ;; Round 1
178 (with-md5-round (f block)
179 (A B C D 0 7 1)(D A B C 1 12 2)(C D A B 2 17 3)(B C D A 3 22 4)
180 (A B C D 4 7 5)(D A B C 5 12 6)(C D A B 6 17 7)(B C D A 7 22 8)
181 (A B C D 8 7 9)(D A B C 9 12 10)(C D A B 10 17 11)(B C D A 11 22 12)
182 (A B C D 12 7 13)(D A B C 13 12 14)(C D A B 14 17 15)(B C D A 15 22 16))
183 ;; Round 2
184 (with-md5-round (g block)
185 (A B C D 1 5 17)(D A B C 6 9 18)(C D A B 11 14 19)(B C D A 0 20 20)
186 (A B C D 5 5 21)(D A B C 10 9 22)(C D A B 15 14 23)(B C D A 4 20 24)
187 (A B C D 9 5 25)(D A B C 14 9 26)(C D A B 3 14 27)(B C D A 8 20 28)
188 (A B C D 13 5 29)(D A B C 2 9 30)(C D A B 7 14 31)(B C D A 12 20 32))
189 ;; Round 3
190 (with-md5-round (h block)
191 (A B C D 5 4 33)(D A B C 8 11 34)(C D A B 11 16 35)(B C D A 14 23 36)
192 (A B C D 1 4 37)(D A B C 4 11 38)(C D A B 7 16 39)(B C D A 10 23 40)
193 (A B C D 13 4 41)(D A B C 0 11 42)(C D A B 3 16 43)(B C D A 6 23 44)
194 (A B C D 9 4 45)(D A B C 12 11 46)(C D A B 15 16 47)(B C D A 2 23 48))
195 ;; Round 4
196 (with-md5-round (i block)
197 (A B C D 0 6 49)(D A B C 7 10 50)(C D A B 14 15 51)(B C D A 5 21 52)
198 (A B C D 12 6 53)(D A B C 3 10 54)(C D A B 10 15 55)(B C D A 1 21 56)
199 (A B C D 8 6 57)(D A B C 15 10 58)(C D A B 6 15 59)(B C D A 13 21 60)
200 (A B C D 4 6 61)(D A B C 11 10 62)(C D A B 2 15 63)(B C D A 9 21 64))
201 ;; Update and return
202 (setf (md5-regs-a regs) (mod32+ (md5-regs-a regs) a)
203 (md5-regs-b regs) (mod32+ (md5-regs-b regs) b)
204 (md5-regs-c regs) (mod32+ (md5-regs-c regs) c)
205 (md5-regs-d regs) (mod32+ (md5-regs-d regs) d))
206 regs))
208 ;;; Section 3.4: Converting 8bit-vectors into 16-Word Blocks
210 (declaim (inline fill-block fill-block-ub8 fill-block-char))
212 (defun fill-block-ub8 (block buffer offset)
213 "Convert a complete 64 (unsigned-byte 8) input vector segment
214 starting from offset into the given 16 word MD5 block."
215 (declare (type (integer 0 #.(- most-positive-fixnum 64)) offset)
216 (type (simple-array ub32 (16)) block)
217 (type (simple-array (unsigned-byte 8) (*)) buffer)
218 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
219 #+little-endian
220 (sb-kernel:ub8-bash-copy buffer offset block 0 64)
221 #+big-endian
222 (loop for i of-type (integer 0 16) from 0
223 for j of-type (integer 0 #.most-positive-fixnum)
224 from offset to (+ offset 63) by 4
226 (setf (aref block i)
227 (assemble-ub32 (aref buffer j)
228 (aref buffer (+ j 1))
229 (aref buffer (+ j 2))
230 (aref buffer (+ j 3))))))
232 (defun fill-block-char (block buffer offset)
233 "Convert a complete 64 character input string segment starting from
234 offset into the given 16 word MD5 block."
235 (declare (type (integer 0 #.(- most-positive-fixnum 64)) offset)
236 (type (simple-array ub32 (16)) block)
237 (type simple-string buffer)
238 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
239 #+little-endian
240 (sb-kernel:ub8-bash-copy buffer offset block 0 64)
241 #+big-endian
242 (loop for i of-type (integer 0 16) from 0
243 for j of-type (integer 0 #.most-positive-fixnum)
244 from offset to (+ offset 63) by 4
246 (setf (aref block i)
247 (assemble-ub32 (char-code (schar buffer j))
248 (char-code (schar buffer (+ j 1)))
249 (char-code (schar buffer (+ j 2)))
250 (char-code (schar buffer (+ j 3)))))))
252 (defun fill-block (block buffer offset)
253 "Convert a complete 64 byte input vector segment into the given 16
254 word MD5 block. This currently works on (unsigned-byte 8) and
255 character simple-arrays, via the functions `fill-block-ub8' and
256 `fill-block-char' respectively."
257 (declare (type (integer 0 #.(- most-positive-fixnum 64)) offset)
258 (type (simple-array ub32 (16)) block)
259 (type (simple-array * (*)) buffer)
260 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
261 (etypecase buffer
262 ((simple-array (unsigned-byte 8) (*))
263 (fill-block-ub8 block buffer offset))
264 (simple-string
265 (fill-block-char block buffer offset))))
267 ;;; Section 3.5: Message Digest Output
269 (declaim (inline md5regs-digest))
270 (defun md5regs-digest (regs)
271 "Create the final 16 byte message-digest from the MD5 working state
272 in regs. Returns a (simple-array (unsigned-byte 8) (16))."
273 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0))
274 (type md5-regs regs))
275 (let ((result (make-array 16 :element-type '(unsigned-byte 8))))
276 (declare (type (simple-array (unsigned-byte 8) (16)) result))
277 (macrolet ((frob (reg offset)
278 (let ((var (gensym)))
279 `(let ((,var ,reg))
280 (declare (type ub32 ,var))
281 (setf
282 (aref result ,offset) (ldb (byte 8 0) ,var)
283 (aref result ,(+ offset 1)) (ldb (byte 8 8) ,var)
284 (aref result ,(+ offset 2)) (ldb (byte 8 16) ,var)
285 (aref result ,(+ offset 3)) (ldb (byte 8 24) ,var))))))
286 (frob (md5-regs-a regs) 0)
287 (frob (md5-regs-b regs) 4)
288 (frob (md5-regs-c regs) 8)
289 (frob (md5-regs-d regs) 12))
290 result))
292 ;;; Mid-Level Drivers
294 (defstruct (md5-state
295 (:constructor make-md5-state ())
296 (:copier))
297 (regs (initial-md5-regs) :type md5-regs :read-only t)
298 (amount 0 :type
299 #-md5-small-length (integer 0 *)
300 #+md5-small-length (unsigned-byte 29))
301 (block (make-array 16 :element-type '(unsigned-byte 32)) :read-only t
302 :type (simple-array (unsigned-byte 32) (16)))
303 (buffer (make-array 64 :element-type '(unsigned-byte 8)) :read-only t
304 :type (simple-array (unsigned-byte 8) (64)))
305 (buffer-index 0 :type (integer 0 63))
306 (finalized-p nil))
308 (declaim (inline copy-to-buffer))
309 (defun copy-to-buffer (from from-offset count buffer buffer-offset)
310 "Copy a partial segment from input vector from starting at
311 from-offset and copying count elements into the 64 byte buffer
312 starting at buffer-offset."
313 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0))
314 (type (unsigned-byte 29) from-offset)
315 (type (integer 0 63) count buffer-offset)
316 (type (simple-array * (*)) from)
317 (type (simple-array (unsigned-byte 8) (64)) buffer))
318 (sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count))
320 (defun update-md5-state (state sequence &key (start 0) (end (length sequence)))
321 "Update the given md5-state from sequence, which is either a
322 simple-string or a simple-array with element-type (unsigned-byte 8),
323 bounded by start and end, which must be numeric bounding-indices."
324 (declare (type md5-state state)
325 (type (simple-array * (*)) sequence)
326 (type fixnum start end)
327 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
328 (let ((regs (md5-state-regs state))
329 (block (md5-state-block state))
330 (buffer (md5-state-buffer state))
331 (buffer-index (md5-state-buffer-index state))
332 (length (- end start)))
333 (declare (type md5-regs regs) (type fixnum length)
334 (type (integer 0 63) buffer-index)
335 (type (simple-array (unsigned-byte 32) (16)) block)
336 (type (simple-array (unsigned-byte 8) (64)) buffer))
337 ;; Handle old rest
338 (unless (zerop buffer-index)
339 (let ((amount (min (- 64 buffer-index) length)))
340 (declare (type (integer 0 63) amount))
341 (copy-to-buffer sequence start amount buffer buffer-index)
342 (setq start (the fixnum (+ start amount)))
343 (let ((new-index (mod (+ buffer-index amount) 64)))
344 (when (zerop new-index)
345 (fill-block-ub8 block buffer 0)
346 (update-md5-block regs block))
347 (when (>= start end)
348 (setf (md5-state-buffer-index state) new-index)
349 (incf (md5-state-amount state) length)
350 (return-from update-md5-state state)))))
351 ;; Handle main-part and new-rest
352 (etypecase sequence
353 ((simple-array (unsigned-byte 8) (*))
354 (locally
355 (declare (type (simple-array (unsigned-byte 8) (*)) sequence))
356 (loop for offset of-type (unsigned-byte 29) from start below end by 64
357 until (< (- end offset) 64)
359 (fill-block-ub8 block sequence offset)
360 (update-md5-block regs block)
361 finally
362 (let ((amount (- end offset)))
363 (unless (zerop amount)
364 (copy-to-buffer sequence offset amount buffer 0))
365 (setf (md5-state-buffer-index state) amount)))))
366 (simple-string
367 (locally
368 (declare (type simple-string sequence))
369 (loop for offset of-type (unsigned-byte 29) from start below end by 64
370 until (< (- end offset) 64)
372 (fill-block-char block sequence offset)
373 (update-md5-block regs block)
374 finally
375 (let ((amount (- end offset)))
376 (unless (zerop amount)
377 (copy-to-buffer sequence offset amount buffer 0))
378 (setf (md5-state-buffer-index state) amount))))))
379 (setf (md5-state-amount state)
380 #-md5-small-length (+ (md5-state-amount state) length)
381 #+md5-small-length (the (unsigned-byte 29)
382 (+ (md5-state-amount state) length)))
383 state))
385 (defun finalize-md5-state (state)
386 "If the given md5-state has not already been finalized, finalize it,
387 by processing any remaining input in its buffer, with suitable padding
388 and appended bit-length, as specified by the MD5 standard.
390 The resulting MD5 message-digest is returned as an array of sixteen
391 (unsigned-byte 8) values. Calling `update-md5-state' after a call to
392 `finalize-md5-state' results in unspecified behaviour."
393 (declare (type md5-state state)
394 (optimize (speed 3) (safety 0) (space 0) (debug 0)))
395 (or (md5-state-finalized-p state)
396 (let ((regs (md5-state-regs state))
397 (block (md5-state-block state))
398 (buffer (md5-state-buffer state))
399 (buffer-index (md5-state-buffer-index state))
400 (total-length (* 8 (md5-state-amount state))))
401 (declare (type md5-regs regs)
402 (type (integer 0 63) buffer-index)
403 (type (simple-array ub32 (16)) block)
404 (type (simple-array (unsigned-byte 8) (*)) buffer))
405 ;; Add mandatory bit 1 padding
406 (setf (aref buffer buffer-index) #x80)
407 ;; Fill with 0 bit padding
408 (loop for index of-type (integer 0 64)
409 from (1+ buffer-index) below 64
410 do (setf (aref buffer index) #x00))
411 (fill-block-ub8 block buffer 0)
412 ;; Flush block first if length wouldn't fit
413 (when (>= buffer-index 56)
414 (update-md5-block regs block)
415 ;; Create new fully 0 padded block
416 (loop for index of-type (integer 0 16) from 0 below 16
417 do (setf (aref block index) #x00000000)))
418 ;; Add 64bit message bit length
419 (setf (aref block 14) (ldb (byte 32 0) total-length))
420 #-md5-small-length
421 (setf (aref block 15) (ldb (byte 32 32) total-length))
422 ;; Flush last block
423 (update-md5-block regs block)
424 ;; Done, remember digest for later calls
425 (setf (md5-state-finalized-p state)
426 (md5regs-digest regs)))))
428 ;;; High-Level Drivers
430 (defun md5sum-sequence (sequence &key (start 0) end)
431 "Calculate the MD5 message-digest of data bounded by START and END
432 in SEQUENCE , which must be a vector with element-type (UNSIGNED-BYTE
433 8)."
434 (declare (optimize (speed 3) (safety 3) (space 0) (debug 1))
435 (type (vector (unsigned-byte 8)) sequence) (type fixnum start))
436 (locally
437 (declare (optimize (safety 1) (debug 0)))
438 (let ((state (make-md5-state)))
439 (declare (type md5-state state))
440 ;; respect the fill pointer
441 (let ((end (or end (length sequence))))
442 (sb-kernel:with-array-data ((data sequence)
443 (real-start start)
444 (real-end end)
445 :check-fill-pointer t)
446 (declare (ignore real-end))
447 (update-md5-state state data :start real-start
448 :end (+ real-start (- end start)))))
449 (finalize-md5-state state))))
451 (defun md5sum-string (string &key (external-format :default) (start 0) end)
452 "Calculate the MD5 message-digest of the binary representation
453 of STRING (as octets) in EXTERNAL-FORMAT. The boundaries START
454 and END refer to character positions in the string, not to octets
455 in the resulting binary representation."
456 (declare (optimize (speed 3) (safety 3) (space 0) (debug 1))
457 (type string string) (type fixnum start))
458 (locally
459 (declare (optimize (safety 1) (debug 0)))
460 (md5sum-sequence
461 (sb-ext:string-to-octets string
462 :external-format external-format
463 :start start :end end))))
465 (defconstant +buffer-size+ (* 128 1024)
466 "Size of internal buffer to use for md5sum-stream and md5sum-file
467 operations. This should be a multiple of 64, the MD5 block size.")
469 (deftype buffer-index () `(integer 0 ,+buffer-size+))
471 (defun md5sum-stream (stream)
472 "Calculate an MD5 message-digest of the contents of STREAM, whose
473 element-type has to be (UNSIGNED-BYTE 8)."
474 (declare (optimize (speed 3) (safety 3) (space 0) (debug 1)))
475 (declare (type stream stream))
476 (locally
477 (declare (optimize (safety 1) (debug 0)))
478 (let ((state (make-md5-state)))
479 (declare (type md5-state state))
480 (cond
481 ((equal (stream-element-type stream) '(unsigned-byte 8))
482 (let ((buffer (make-array +buffer-size+
483 :element-type '(unsigned-byte 8))))
484 (declare (type (simple-array (unsigned-byte 8) (#.+buffer-size+))
485 buffer))
486 (loop for bytes of-type buffer-index = (read-sequence buffer stream)
487 do (update-md5-state state buffer :end bytes)
488 until (< bytes +buffer-size+)
489 finally
490 (return (finalize-md5-state state)))))
491 #+(or)
492 ((equal (stream-element-type stream) 'character)
493 (let ((buffer (make-string +buffer-size+)))
494 (declare (type (simple-string #.+buffer-size+) buffer))
495 (loop for bytes of-type buffer-index = (read-sequence buffer stream)
496 do (update-md5-state state buffer :end bytes)
497 until (< bytes +buffer-size+)
498 finally
499 (return (finalize-md5-state state)))))
501 (error "Unsupported stream element-type ~S for stream ~S."
502 (stream-element-type stream) stream))))))
504 (defun md5sum-file (pathname)
505 "Calculate the MD5 message-digest of the file designated by
506 pathname."
507 (declare (optimize (speed 3) (safety 3) (space 0) (debug 1)))
508 (locally
509 (declare (optimize (safety 1) (debug 0)))
510 (with-open-file (stream pathname :element-type '(unsigned-byte 8))
511 (md5sum-stream stream))))