Merge branch 'jc/grep'
[git/jnareb-git.git] / ppc / sha1ppc.S
blobe85611a4ef0598f45911357d0d2f1fc354039de4
1 /*
2  * SHA-1 implementation for PowerPC.
3  *
4  * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
5  */
6 #define FS      80
8 /*
9  * We roll the registers for T, A, B, C, D, E around on each
10  * iteration; T on iteration t is A on iteration t+1, and so on.
11  * We use registers 7 - 12 for this.
12  */
13 #define RT(t)   ((((t)+5)%6)+7)
14 #define RA(t)   ((((t)+4)%6)+7)
15 #define RB(t)   ((((t)+3)%6)+7)
16 #define RC(t)   ((((t)+2)%6)+7)
17 #define RD(t)   ((((t)+1)%6)+7)
18 #define RE(t)   ((((t)+0)%6)+7)
20 /* We use registers 16 - 31 for the W values */
21 #define W(t)    (((t)%16)+16)
23 #define STEPD0(t)                               \
24         and     %r6,RB(t),RC(t);                \
25         andc    %r0,RD(t),RB(t);                \
26         rotlwi  RT(t),RA(t),5;                  \
27         rotlwi  RB(t),RB(t),30;                 \
28         or      %r6,%r6,%r0;                    \
29         add     %r0,RE(t),%r15;                 \
30         add     RT(t),RT(t),%r6;                \
31         add     %r0,%r0,W(t);                   \
32         add     RT(t),RT(t),%r0
34 #define STEPD1(t)                               \
35         xor     %r6,RB(t),RC(t);                \
36         rotlwi  RT(t),RA(t),5;                  \
37         rotlwi  RB(t),RB(t),30;                 \
38         xor     %r6,%r6,RD(t);                  \
39         add     %r0,RE(t),%r15;                 \
40         add     RT(t),RT(t),%r6;                \
41         add     %r0,%r0,W(t);                   \
42         add     RT(t),RT(t),%r0
44 #define STEPD2(t)                               \
45         and     %r6,RB(t),RC(t);                \
46         and     %r0,RB(t),RD(t);                \
47         rotlwi  RT(t),RA(t),5;                  \
48         rotlwi  RB(t),RB(t),30;                 \
49         or      %r6,%r6,%r0;                    \
50         and     %r0,RC(t),RD(t);                \
51         or      %r6,%r6,%r0;                    \
52         add     %r0,RE(t),%r15;                 \
53         add     RT(t),RT(t),%r6;                \
54         add     %r0,%r0,W(t);                   \
55         add     RT(t),RT(t),%r0
57 #define LOADW(t)                                \
58         lwz     W(t),(t)*4(%r4)
60 #define UPDATEW(t)                              \
61         xor     %r0,W((t)-3),W((t)-8);          \
62         xor     W(t),W((t)-16),W((t)-14);       \
63         xor     W(t),W(t),%r0;                  \
64         rotlwi  W(t),W(t),1
66 #define STEP0LD4(t)                             \
67         STEPD0(t);   LOADW((t)+4);              \
68         STEPD0((t)+1); LOADW((t)+5);            \
69         STEPD0((t)+2); LOADW((t)+6);            \
70         STEPD0((t)+3); LOADW((t)+7)
72 #define STEPUP4(t, fn)                          \
73         STEP##fn(t);   UPDATEW((t)+4);          \
74         STEP##fn((t)+1); UPDATEW((t)+5);        \
75         STEP##fn((t)+2); UPDATEW((t)+6);        \
76         STEP##fn((t)+3); UPDATEW((t)+7)
78 #define STEPUP20(t, fn)                         \
79         STEPUP4(t, fn);                         \
80         STEPUP4((t)+4, fn);                     \
81         STEPUP4((t)+8, fn);                     \
82         STEPUP4((t)+12, fn);                    \
83         STEPUP4((t)+16, fn)
85         .globl  sha1_core
86 sha1_core:
87         stwu    %r1,-FS(%r1)
88         stw     %r15,FS-68(%r1)
89         stw     %r16,FS-64(%r1)
90         stw     %r17,FS-60(%r1)
91         stw     %r18,FS-56(%r1)
92         stw     %r19,FS-52(%r1)
93         stw     %r20,FS-48(%r1)
94         stw     %r21,FS-44(%r1)
95         stw     %r22,FS-40(%r1)
96         stw     %r23,FS-36(%r1)
97         stw     %r24,FS-32(%r1)
98         stw     %r25,FS-28(%r1)
99         stw     %r26,FS-24(%r1)
100         stw     %r27,FS-20(%r1)
101         stw     %r28,FS-16(%r1)
102         stw     %r29,FS-12(%r1)
103         stw     %r30,FS-8(%r1)
104         stw     %r31,FS-4(%r1)
106         /* Load up A - E */
107         lwz     RA(0),0(%r3)    /* A */
108         lwz     RB(0),4(%r3)    /* B */
109         lwz     RC(0),8(%r3)    /* C */
110         lwz     RD(0),12(%r3)   /* D */
111         lwz     RE(0),16(%r3)   /* E */
113         mtctr   %r5
115 1:      LOADW(0)
116         LOADW(1)
117         LOADW(2)
118         LOADW(3)
120         lis     %r15,0x5a82     /* K0-19 */
121         ori     %r15,%r15,0x7999
122         STEP0LD4(0)
123         STEP0LD4(4)
124         STEP0LD4(8)
125         STEPUP4(12, D0)
126         STEPUP4(16, D0)
128         lis     %r15,0x6ed9     /* K20-39 */
129         ori     %r15,%r15,0xeba1
130         STEPUP20(20, D1)
132         lis     %r15,0x8f1b     /* K40-59 */
133         ori     %r15,%r15,0xbcdc
134         STEPUP20(40, D2)
136         lis     %r15,0xca62     /* K60-79 */
137         ori     %r15,%r15,0xc1d6
138         STEPUP4(60, D1)
139         STEPUP4(64, D1)
140         STEPUP4(68, D1)
141         STEPUP4(72, D1)
142         STEPD1(76)
143         STEPD1(77)
144         STEPD1(78)
145         STEPD1(79)
147         lwz     %r20,16(%r3)
148         lwz     %r19,12(%r3)
149         lwz     %r18,8(%r3)
150         lwz     %r17,4(%r3)
151         lwz     %r16,0(%r3)
152         add     %r20,RE(80),%r20
153         add     RD(0),RD(80),%r19
154         add     RC(0),RC(80),%r18
155         add     RB(0),RB(80),%r17
156         add     RA(0),RA(80),%r16
157         mr      RE(0),%r20
158         stw     RA(0),0(%r3)
159         stw     RB(0),4(%r3)
160         stw     RC(0),8(%r3)
161         stw     RD(0),12(%r3)
162         stw     RE(0),16(%r3)
164         addi    %r4,%r4,64
165         bdnz    1b
167         lwz     %r15,FS-68(%r1)
168         lwz     %r16,FS-64(%r1)
169         lwz     %r17,FS-60(%r1)
170         lwz     %r18,FS-56(%r1)
171         lwz     %r19,FS-52(%r1)
172         lwz     %r20,FS-48(%r1)
173         lwz     %r21,FS-44(%r1)
174         lwz     %r22,FS-40(%r1)
175         lwz     %r23,FS-36(%r1)
176         lwz     %r24,FS-32(%r1)
177         lwz     %r25,FS-28(%r1)
178         lwz     %r26,FS-24(%r1)
179         lwz     %r27,FS-20(%r1)
180         lwz     %r28,FS-16(%r1)
181         lwz     %r29,FS-12(%r1)
182         lwz     %r30,FS-8(%r1)
183         lwz     %r31,FS-4(%r1)
184         addi    %r1,%r1,FS
185         blr