2 * SHA transform optimized for ARM
4 * Copyright: (C) 2005 by Nicolas Pitre <nico@cam.org>
5 * Created: September 17, 2005
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
13 .globl arm_sha_transform
16 * void sha_transform(uint32_t *hash, const unsigned char *data, uint32_t *W);
18 * note: the "data" pointer may be unaligned.
23 stmfd sp!, {r4 - r8, lr}
25 @ for (i = 0; i < 16; i++)
26 @ W[i] = ntohl(((uint32_t *)data)[i]);
43 orr r5, r5, r4, lsl #8
44 orr r6, r6, r5, lsl #8
45 orr r7, r7, r6, lsl #8
50 @ for (i = 0; i < 64; i++)
51 @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
68 * The SHA functions are:
70 * f1(B,C,D) = (D ^ (B & (C ^ D)))
71 * f2(B,C,D) = (B ^ C ^ D)
72 * f3(B,C,D) = ((B & C) | (D & (B | C)))
74 * Then the sub-blocks are processed as follows:
76 * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
82 * We therefore unroll each loop 5 times to avoid register shuffling.
83 * Also the ror for C (and also D and E which are successivelyderived
84 * from it) is applied in place to cut on an additional mov insn for
88 .macro sha_f1, A, B, C, D, E
91 add \E, r1, \E, ror #2
92 and ip, \B, ip, ror #2
93 add \E, \E, \A, ror #27
94 eor ip, ip, \D, ror #2
99 .macro sha_f2, A, B, C, D, E
101 add \E, r1, \E, ror #2
102 eor ip, \B, \C, ror #2
103 add \E, \E, \A, ror #27
104 eor ip, ip, \D, ror #2
109 .macro sha_f3, A, B, C, D, E
111 add \E, r1, \E, ror #2
112 orr ip, \B, \C, ror #2
113 add \E, \E, \A, ror #27
114 and ip, ip, \D, ror #2
116 and r3, \B, \C, ror #2
126 /* adjust initial values */
132 sha_f1 r4, r5, r6, r7, r8
133 sha_f1 r8, r4, r5, r6, r7
134 sha_f1 r7, r8, r4, r5, r6
135 sha_f1 r6, r7, r8, r4, r5
136 sha_f1 r5, r6, r7, r8, r4
143 sha_f2 r4, r5, r6, r7, r8
144 sha_f2 r8, r4, r5, r6, r7
145 sha_f2 r7, r8, r4, r5, r6
146 sha_f2 r6, r7, r8, r4, r5
147 sha_f2 r5, r6, r7, r8, r4
154 sha_f3 r4, r5, r6, r7, r8
155 sha_f3 r8, r4, r5, r6, r7
156 sha_f3 r7, r8, r4, r5, r6
157 sha_f3 r6, r7, r8, r4, r5
158 sha_f3 r5, r6, r7, r8, r4
161 ldr r1, .L_sha_K + 12
165 sha_f2 r4, r5, r6, r7, r8
166 sha_f2 r8, r4, r5, r6, r7
167 sha_f2 r7, r8, r4, r5, r6
168 sha_f2 r6, r7, r8, r4, r5
169 sha_f2 r5, r6, r7, r8, r4
172 ldmia r0, {r1, r2, r3, ip, lr}
175 add r6, r3, r6, ror #2
176 add r7, ip, r7, ror #2
177 add r8, lr, r8, ror #2
180 ldmfd sp!, {r4 - r8, pc}
183 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6