OpenSSL: update to 1.0.2a
[tomato.git] / release / src / router / openssl / crypto / sha / asm / sha1-ppc.pl
blobdf5989610c4c70571e30499ce3bc7b69d02e59bc
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # I let hardware handle unaligned input(*), except on page boundaries
11 # (see below for details). Otherwise straightforward implementation
12 # with X vector in register bank.
14 # (*) this means that this module is inappropriate for PPC403? Does
15 # anybody know if pre-POWER3 can sustain unaligned load?
17 # -m64 -m32
18 # ----------------------------------
19 # PPC970,gcc-4.0.0 +76% +59%
20 # Power6,xlc-7 +68% +33%
22 $flavour = shift;
24 if ($flavour =~ /64/) {
25 $SIZE_T =8;
26 $LRSAVE =2*$SIZE_T;
27 $UCMP ="cmpld";
28 $STU ="stdu";
29 $POP ="ld";
30 $PUSH ="std";
31 } elsif ($flavour =~ /32/) {
32 $SIZE_T =4;
33 $LRSAVE =$SIZE_T;
34 $UCMP ="cmplw";
35 $STU ="stwu";
36 $POP ="lwz";
37 $PUSH ="stw";
38 } else { die "nonsense $flavour"; }
40 # Define endianess based on flavour
41 # i.e.: linux64le
42 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
44 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
45 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
46 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
47 die "can't locate ppc-xlate.pl";
49 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
51 $FRAME=24*$SIZE_T+64;
52 $LOCALS=6*$SIZE_T;
54 $K ="r0";
55 $sp ="r1";
56 $toc="r2";
57 $ctx="r3";
58 $inp="r4";
59 $num="r5";
60 $t0 ="r15";
61 $t1 ="r6";
63 $A ="r7";
64 $B ="r8";
65 $C ="r9";
66 $D ="r10";
67 $E ="r11";
68 $T ="r12";
70 @V=($A,$B,$C,$D,$E,$T);
71 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
72 "r24","r25","r26","r27","r28","r29","r30","r31");
74 sub loadbe {
75 my ($dst, $src, $temp_reg) = @_;
76 $code.=<<___ if (!$LITTLE_ENDIAN);
77 lwz $dst,$src
78 ___
79 $code.=<<___ if ($LITTLE_ENDIAN);
80 lwz $temp_reg,$src
81 rotlwi $dst,$temp_reg,8
82 rlwimi $dst,$temp_reg,24,0,7
83 rlwimi $dst,$temp_reg,24,16,23
84 ___
87 sub BODY_00_19 {
88 my ($i,$a,$b,$c,$d,$e,$f)=@_;
89 my $j=$i+1;
91 # Since the last value of $f is discarded, we can use
92 # it as a temp reg to swap byte-order when needed.
93 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
94 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
95 $code.=<<___ if ($i<15);
96 add $f,$K,$e
97 rotlwi $e,$a,5
98 add $f,$f,@X[$i]
99 and $t0,$c,$b
100 add $f,$f,$e
101 andc $t1,$d,$b
102 rotlwi $b,$b,30
103 or $t0,$t0,$t1
104 add $f,$f,$t0
106 $code.=<<___ if ($i>=15);
107 add $f,$K,$e
108 rotlwi $e,$a,5
109 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
110 add $f,$f,@X[$i%16]
111 and $t0,$c,$b
112 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
113 add $f,$f,$e
114 andc $t1,$d,$b
115 rotlwi $b,$b,30
116 or $t0,$t0,$t1
117 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
118 add $f,$f,$t0
119 rotlwi @X[$j%16],@X[$j%16],1
123 sub BODY_20_39 {
124 my ($i,$a,$b,$c,$d,$e,$f)=@_;
125 my $j=$i+1;
126 $code.=<<___ if ($i<79);
127 add $f,$K,$e
128 xor $t0,$b,$d
129 rotlwi $e,$a,5
130 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
131 add $f,$f,@X[$i%16]
132 xor $t0,$t0,$c
133 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
134 add $f,$f,$t0
135 rotlwi $b,$b,30
136 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
137 add $f,$f,$e
138 rotlwi @X[$j%16],@X[$j%16],1
140 $code.=<<___ if ($i==79);
141 add $f,$K,$e
142 xor $t0,$b,$d
143 rotlwi $e,$a,5
144 lwz r16,0($ctx)
145 add $f,$f,@X[$i%16]
146 xor $t0,$t0,$c
147 lwz r17,4($ctx)
148 add $f,$f,$t0
149 rotlwi $b,$b,30
150 lwz r18,8($ctx)
151 lwz r19,12($ctx)
152 add $f,$f,$e
153 lwz r20,16($ctx)
157 sub BODY_40_59 {
158 my ($i,$a,$b,$c,$d,$e,$f)=@_;
159 my $j=$i+1;
160 $code.=<<___;
161 add $f,$K,$e
162 rotlwi $e,$a,5
163 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
164 add $f,$f,@X[$i%16]
165 and $t0,$b,$c
166 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
167 add $f,$f,$e
168 or $t1,$b,$c
169 rotlwi $b,$b,30
170 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
171 and $t1,$t1,$d
172 or $t0,$t0,$t1
173 rotlwi @X[$j%16],@X[$j%16],1
174 add $f,$f,$t0
178 $code=<<___;
179 .machine "any"
180 .text
182 .globl .sha1_block_data_order
183 .align 4
184 .sha1_block_data_order:
185 $STU $sp,-$FRAME($sp)
186 mflr r0
187 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
188 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
189 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
190 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
191 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
192 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
193 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
194 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
195 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
196 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
197 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
198 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
199 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
200 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
201 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
202 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
203 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
204 $PUSH r0,`$FRAME+$LRSAVE`($sp)
205 lwz $A,0($ctx)
206 lwz $B,4($ctx)
207 lwz $C,8($ctx)
208 lwz $D,12($ctx)
209 lwz $E,16($ctx)
210 andi. r0,$inp,3
211 bne Lunaligned
212 Laligned:
213 mtctr $num
214 bl Lsha1_block_private
215 b Ldone
217 ; PowerPC specification allows an implementation to be ill-behaved
218 ; upon unaligned access which crosses page boundary. "Better safe
219 ; than sorry" principle makes me treat it specially. But I don't
220 ; look for particular offending word, but rather for 64-byte input
221 ; block which crosses the boundary. Once found that block is aligned
222 ; and hashed separately...
223 .align 4
224 Lunaligned:
225 subfic $t1,$inp,4096
226 andi. $t1,$t1,4095 ; distance to closest page boundary
227 srwi. $t1,$t1,6 ; t1/=64
228 beq Lcross_page
229 $UCMP $num,$t1
230 ble- Laligned ; didn't cross the page boundary
231 mtctr $t1
232 subfc $num,$t1,$num
233 bl Lsha1_block_private
234 Lcross_page:
235 li $t1,16
236 mtctr $t1
237 addi r20,$sp,$LOCALS ; spot within the frame
238 Lmemcpy:
239 lbz r16,0($inp)
240 lbz r17,1($inp)
241 lbz r18,2($inp)
242 lbz r19,3($inp)
243 addi $inp,$inp,4
244 stb r16,0(r20)
245 stb r17,1(r20)
246 stb r18,2(r20)
247 stb r19,3(r20)
248 addi r20,r20,4
249 bdnz Lmemcpy
251 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
252 li $t1,1
253 addi $inp,$sp,$LOCALS
254 mtctr $t1
255 bl Lsha1_block_private
256 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
257 addic. $num,$num,-1
258 bne- Lunaligned
260 Ldone:
261 $POP r0,`$FRAME+$LRSAVE`($sp)
262 $POP r15,`$FRAME-$SIZE_T*17`($sp)
263 $POP r16,`$FRAME-$SIZE_T*16`($sp)
264 $POP r17,`$FRAME-$SIZE_T*15`($sp)
265 $POP r18,`$FRAME-$SIZE_T*14`($sp)
266 $POP r19,`$FRAME-$SIZE_T*13`($sp)
267 $POP r20,`$FRAME-$SIZE_T*12`($sp)
268 $POP r21,`$FRAME-$SIZE_T*11`($sp)
269 $POP r22,`$FRAME-$SIZE_T*10`($sp)
270 $POP r23,`$FRAME-$SIZE_T*9`($sp)
271 $POP r24,`$FRAME-$SIZE_T*8`($sp)
272 $POP r25,`$FRAME-$SIZE_T*7`($sp)
273 $POP r26,`$FRAME-$SIZE_T*6`($sp)
274 $POP r27,`$FRAME-$SIZE_T*5`($sp)
275 $POP r28,`$FRAME-$SIZE_T*4`($sp)
276 $POP r29,`$FRAME-$SIZE_T*3`($sp)
277 $POP r30,`$FRAME-$SIZE_T*2`($sp)
278 $POP r31,`$FRAME-$SIZE_T*1`($sp)
279 mtlr r0
280 addi $sp,$sp,$FRAME
282 .long 0
283 .byte 0,12,4,1,0x80,18,3,0
284 .long 0
287 # This is private block function, which uses tailored calling
288 # interface, namely upon entry SHA_CTX is pre-loaded to given
289 # registers and counter register contains amount of chunks to
290 # digest...
291 $code.=<<___;
292 .align 4
293 Lsha1_block_private:
295 $code.=<<___; # load K_00_19
296 lis $K,0x5a82
297 ori $K,$K,0x7999
299 for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
300 $code.=<<___; # load K_20_39
301 lis $K,0x6ed9
302 ori $K,$K,0xeba1
304 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
305 $code.=<<___; # load K_40_59
306 lis $K,0x8f1b
307 ori $K,$K,0xbcdc
309 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
310 $code.=<<___; # load K_60_79
311 lis $K,0xca62
312 ori $K,$K,0xc1d6
314 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
315 $code.=<<___;
316 add r16,r16,$E
317 add r17,r17,$T
318 add r18,r18,$A
319 add r19,r19,$B
320 add r20,r20,$C
321 stw r16,0($ctx)
322 mr $A,r16
323 stw r17,4($ctx)
324 mr $B,r17
325 stw r18,8($ctx)
326 mr $C,r18
327 stw r19,12($ctx)
328 mr $D,r19
329 stw r20,16($ctx)
330 mr $E,r20
331 addi $inp,$inp,`16*4`
332 bdnz- Lsha1_block_private
334 .long 0
335 .byte 0,12,0x14,0,0,0,0,0
336 .size .sha1_block_data_order,.-.sha1_block_data_order
338 $code.=<<___;
339 .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
342 $code =~ s/\`([^\`]*)\`/eval $1/gem;
343 print $code;
344 close STDOUT;