import libcrypto (LibreSSL 2.5.2)
[unleashed.git] / lib / libcrypto / sha / asm / sha1-alpha.pl
blob44720c418c83fc82b72cf5eaacacc2cacfd2498b
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # SHA1 block procedure for Alpha.
12 # On 21264 performance is 33% better than code generated by vendor
13 # compiler, and 75% better than GCC [3.4], and in absolute terms is
14 # 8.7 cycles per processed byte. Implementation features vectorized
15 # byte swap, but not Xupdate.
17 @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
19 $ctx="a0"; # $16
20 $inp="a1";
21 $num="a2";
22 $A="a3";
23 $B="a4"; # 20
24 $C="a5";
25 $D="t8";
26 $E="t9"; @V=($A,$B,$C,$D,$E);
27 $t0="t10"; # 24
28 $t1="t11";
29 $t2="ra";
30 $t3="t12";
31 $K="AT"; # 28
33 sub BODY_00_19 {
34 my ($i,$a,$b,$c,$d,$e)=@_;
35 my $j=$i+1;
36 $code.=<<___ if ($i==0);
37 ldq_u @X[0],0+0($inp)
38 ldq_u @X[1],0+7($inp)
39 ___
40 $code.=<<___ if (!($i&1) && $i<14);
41 ldq_u @X[$i+2],($i+2)*4+0($inp)
42 ldq_u @X[$i+3],($i+2)*4+7($inp)
43 ___
44 $code.=<<___ if (!($i&1) && $i<15);
45 extql @X[$i],$inp,@X[$i]
46 extqh @X[$i+1],$inp,@X[$i+1]
48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
50 srl @X[$i],24,$t0 # vectorized byte swap
51 srl @X[$i],8,$t2
53 sll @X[$i],8,$t3
54 sll @X[$i],24,@X[$i]
55 zapnot $t0,0x11,$t0
56 zapnot $t2,0x22,$t2
58 zapnot @X[$i],0x88,@X[$i]
59 or $t0,$t2,$t0
60 zapnot $t3,0x44,$t3
61 sll $a,5,$t1
63 or @X[$i],$t0,@X[$i]
64 addl $K,$e,$e
65 and $b,$c,$t2
66 zapnot $a,0xf,$a
68 or @X[$i],$t3,@X[$i]
69 srl $a,27,$t0
70 bic $d,$b,$t3
71 sll $b,30,$b
73 extll @X[$i],4,@X[$i+1] # extract upper half
74 or $t2,$t3,$t2
75 addl @X[$i],$e,$e
77 addl $t1,$e,$e
78 srl $b,32,$t3
79 zapnot @X[$i],0xf,@X[$i]
81 addl $t0,$e,$e
82 addl $t2,$e,$e
83 or $t3,$b,$b
84 ___
85 $code.=<<___ if (($i&1) && $i<15);
86 sll $a,5,$t1
87 addl $K,$e,$e
88 and $b,$c,$t2
89 zapnot $a,0xf,$a
91 srl $a,27,$t0
92 addl @X[$i%16],$e,$e
93 bic $d,$b,$t3
94 sll $b,30,$b
96 or $t2,$t3,$t2
97 addl $t1,$e,$e
98 srl $b,32,$t3
99 zapnot @X[$i],0xf,@X[$i]
101 addl $t0,$e,$e
102 addl $t2,$e,$e
103 or $t3,$b,$b
105 $code.=<<___ if ($i>=15); # with forward Xupdate
106 sll $a,5,$t1
107 addl $K,$e,$e
108 and $b,$c,$t2
109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
111 zapnot $a,0xf,$a
112 addl @X[$i%16],$e,$e
113 bic $d,$b,$t3
114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
116 srl $a,27,$t0
117 addl $t1,$e,$e
118 or $t2,$t3,$t2
119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
121 sll $b,30,$b
122 addl $t0,$e,$e
123 srl @X[$j%16],31,$t1
125 addl $t2,$e,$e
126 srl $b,32,$t3
127 addl @X[$j%16],@X[$j%16],@X[$j%16]
129 or $t3,$b,$b
130 zapnot @X[$i%16],0xf,@X[$i%16]
131 or $t1,@X[$j%16],@X[$j%16]
135 sub BODY_20_39 {
136 my ($i,$a,$b,$c,$d,$e)=@_;
137 my $j=$i+1;
138 $code.=<<___ if ($i<79); # with forward Xupdate
139 sll $a,5,$t1
140 addl $K,$e,$e
141 zapnot $a,0xf,$a
142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
144 sll $b,30,$t3
145 addl $t1,$e,$e
146 xor $b,$c,$t2
147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
149 srl $b,2,$b
150 addl @X[$i%16],$e,$e
151 xor $d,$t2,$t2
152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
154 srl @X[$j%16],31,$t1
155 addl $t2,$e,$e
156 srl $a,27,$t0
157 addl @X[$j%16],@X[$j%16],@X[$j%16]
159 or $t3,$b,$b
160 addl $t0,$e,$e
161 or $t1,@X[$j%16],@X[$j%16]
163 $code.=<<___ if ($i<77);
164 zapnot @X[$i%16],0xf,@X[$i%16]
166 $code.=<<___ if ($i==79); # with context fetch
167 sll $a,5,$t1
168 addl $K,$e,$e
169 zapnot $a,0xf,$a
170 ldl @X[0],0($ctx)
172 sll $b,30,$t3
173 addl $t1,$e,$e
174 xor $b,$c,$t2
175 ldl @X[1],4($ctx)
177 srl $b,2,$b
178 addl @X[$i%16],$e,$e
179 xor $d,$t2,$t2
180 ldl @X[2],8($ctx)
182 srl $a,27,$t0
183 addl $t2,$e,$e
184 ldl @X[3],12($ctx)
186 or $t3,$b,$b
187 addl $t0,$e,$e
188 ldl @X[4],16($ctx)
192 sub BODY_40_59 {
193 my ($i,$a,$b,$c,$d,$e)=@_;
194 my $j=$i+1;
195 $code.=<<___; # with forward Xupdate
196 sll $a,5,$t1
197 addl $K,$e,$e
198 zapnot $a,0xf,$a
199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
201 srl $a,27,$t0
202 and $b,$c,$t2
203 and $b,$d,$t3
204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
206 sll $b,30,$b
207 addl $t1,$e,$e
208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
210 srl @X[$j%16],31,$t1
211 addl $t0,$e,$e
212 or $t2,$t3,$t2
213 and $c,$d,$t3
215 or $t2,$t3,$t2
216 srl $b,32,$t3
217 addl @X[$i%16],$e,$e
218 addl @X[$j%16],@X[$j%16],@X[$j%16]
220 or $t3,$b,$b
221 addl $t2,$e,$e
222 or $t1,@X[$j%16],@X[$j%16]
223 zapnot @X[$i%16],0xf,@X[$i%16]
227 $code=<<___;
228 #include <machine/asm.h>
230 .text
232 .set noat
233 .set noreorder
234 .globl sha1_block_data_order
235 .align 5
236 .ent sha1_block_data_order
237 sha1_block_data_order:
238 lda sp,-64(sp)
239 stq ra,0(sp)
240 stq s0,8(sp)
241 stq s1,16(sp)
242 stq s2,24(sp)
243 stq s3,32(sp)
244 stq s4,40(sp)
245 stq s5,48(sp)
246 stq fp,56(sp)
247 .mask 0x0400fe00,-64
248 .frame sp,64,ra
249 .prologue 0
251 ldl $A,0($ctx)
252 ldl $B,4($ctx)
253 sll $num,6,$num
254 ldl $C,8($ctx)
255 ldl $D,12($ctx)
256 ldl $E,16($ctx)
257 addq $inp,$num,$num
259 .Lloop:
260 .set noreorder
261 ldah $K,23170(zero)
262 zapnot $B,0xf,$B
263 lda $K,31129($K) # K_00_19
265 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
267 $code.=<<___;
268 ldah $K,28378(zero)
269 lda $K,-5215($K) # K_20_39
271 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
273 $code.=<<___;
274 ldah $K,-28900(zero)
275 lda $K,-17188($K) # K_40_59
277 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
279 $code.=<<___;
280 ldah $K,-13725(zero)
281 lda $K,-15914($K) # K_60_79
283 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
285 $code.=<<___;
286 addl @X[0],$A,$A
287 addl @X[1],$B,$B
288 addl @X[2],$C,$C
289 addl @X[3],$D,$D
290 addl @X[4],$E,$E
291 stl $A,0($ctx)
292 stl $B,4($ctx)
293 addq $inp,64,$inp
294 stl $C,8($ctx)
295 stl $D,12($ctx)
296 stl $E,16($ctx)
297 cmpult $inp,$num,$t1
298 bne $t1,.Lloop
300 .set noreorder
301 ldq ra,0(sp)
302 ldq s0,8(sp)
303 ldq s1,16(sp)
304 ldq s2,24(sp)
305 ldq s3,32(sp)
306 ldq s4,40(sp)
307 ldq s5,48(sp)
308 ldq fp,56(sp)
309 lda sp,64(sp)
310 ret (ra)
311 .end sha1_block_data_order
312 .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
313 .align 2
315 $output=shift and open STDOUT,">$output";
316 print $code;
317 close STDOUT;