OpenSSL: update to 1.0.2a
[tomato.git] / release / src / router / openssl / crypto / aes / asm / aesp8-ppc.pl
bloba1891cc03caa6bd0815f02b575334021d45f9b4a
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
24 $flavour = shift;
26 if ($flavour =~ /64/) {
27 $SIZE_T =8;
28 $LRSAVE =2*$SIZE_T;
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32 $UCMP ="cmpld";
33 $SHL ="sldi";
34 } elsif ($flavour =~ /32/) {
35 $SIZE_T =4;
36 $LRSAVE =$SIZE_T;
37 $STU ="stwu";
38 $POP ="lwz";
39 $PUSH ="stw";
40 $UCMP ="cmplw";
41 $SHL ="slwi";
42 } else { die "nonsense $flavour"; }
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
53 $FRAME=8*$SIZE_T;
54 $prefix="aes_p8";
56 $sp="r1";
57 $vrsave="r12";
59 #########################################################################
60 {{{ # Key setup procedures #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
65 $code.=<<___;
66 .machine "any"
68 .text
70 .align 7
71 rcon:
72 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
75 .long 0,0,0,0 ?asis
76 Lconsts:
77 mflr r0
78 bcl 20,31,\$+4
79 mflr $ptr #vvvvv "distance between . and rcon
80 addi $ptr,$ptr,-0x48
81 mtlr r0
82 blr
83 .long 0
84 .byte 0,12,0x14,0,0,0,0,0
85 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
87 .globl .${prefix}_set_encrypt_key
88 .align 5
89 .${prefix}_set_encrypt_key:
90 Lset_encrypt_key:
91 mflr r11
92 $PUSH r11,$LRSAVE($sp)
94 li $ptr,-1
95 ${UCMP}i $inp,0
96 beq- Lenc_key_abort # if ($inp==0) return -1;
97 ${UCMP}i $out,0
98 beq- Lenc_key_abort # if ($out==0) return -1;
99 li $ptr,-2
100 cmpwi $bits,128
101 blt- Lenc_key_abort
102 cmpwi $bits,256
103 bgt- Lenc_key_abort
104 andi. r0,$bits,0x3f
105 bne- Lenc_key_abort
107 lis r0,0xfff0
108 mfspr $vrsave,256
109 mtspr 256,r0
111 bl Lconsts
112 mtlr r11
114 neg r9,$inp
115 lvx $in0,0,$inp
116 addi $inp,$inp,15 # 15 is not typo
117 lvsr $key,0,r9 # borrow $key
118 li r8,0x20
119 cmpwi $bits,192
120 lvx $in1,0,$inp
121 le?vspltisb $mask,0x0f # borrow $mask
122 lvx $rcon,0,$ptr
123 le?vxor $key,$key,$mask # adjust for byte swap
124 lvx $mask,r8,$ptr
125 addi $ptr,$ptr,0x10
126 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
127 li $cnt,8
128 vxor $zero,$zero,$zero
129 mtctr $cnt
131 ?lvsr $outperm,0,$out
132 vspltisb $outmask,-1
133 lvx $outhead,0,$out
134 ?vperm $outmask,$zero,$outmask,$outperm
136 blt Loop128
137 addi $inp,$inp,8
138 beq L192
139 addi $inp,$inp,8
140 b L256
142 .align 4
143 Loop128:
144 vperm $key,$in0,$in0,$mask # rotate-n-splat
145 vsldoi $tmp,$zero,$in0,12 # >>32
146 vperm $outtail,$in0,$in0,$outperm # rotate
147 vsel $stage,$outhead,$outtail,$outmask
148 vmr $outhead,$outtail
149 vcipherlast $key,$key,$rcon
150 stvx $stage,0,$out
151 addi $out,$out,16
153 vxor $in0,$in0,$tmp
154 vsldoi $tmp,$zero,$tmp,12 # >>32
155 vxor $in0,$in0,$tmp
156 vsldoi $tmp,$zero,$tmp,12 # >>32
157 vxor $in0,$in0,$tmp
158 vadduwm $rcon,$rcon,$rcon
159 vxor $in0,$in0,$key
160 bdnz Loop128
162 lvx $rcon,0,$ptr # last two round keys
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
170 stvx $stage,0,$out
171 addi $out,$out,16
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vsldoi $tmp,$zero,$tmp,12 # >>32
177 vxor $in0,$in0,$tmp
178 vadduwm $rcon,$rcon,$rcon
179 vxor $in0,$in0,$key
181 vperm $key,$in0,$in0,$mask # rotate-n-splat
182 vsldoi $tmp,$zero,$in0,12 # >>32
183 vperm $outtail,$in0,$in0,$outperm # rotate
184 vsel $stage,$outhead,$outtail,$outmask
185 vmr $outhead,$outtail
186 vcipherlast $key,$key,$rcon
187 stvx $stage,0,$out
188 addi $out,$out,16
190 vxor $in0,$in0,$tmp
191 vsldoi $tmp,$zero,$tmp,12 # >>32
192 vxor $in0,$in0,$tmp
193 vsldoi $tmp,$zero,$tmp,12 # >>32
194 vxor $in0,$in0,$tmp
195 vxor $in0,$in0,$key
196 vperm $outtail,$in0,$in0,$outperm # rotate
197 vsel $stage,$outhead,$outtail,$outmask
198 vmr $outhead,$outtail
199 stvx $stage,0,$out
201 addi $inp,$out,15 # 15 is not typo
202 addi $out,$out,0x50
204 li $rounds,10
205 b Ldone
207 .align 4
208 L192:
209 lvx $tmp,0,$inp
210 li $cnt,4
211 vperm $outtail,$in0,$in0,$outperm # rotate
212 vsel $stage,$outhead,$outtail,$outmask
213 vmr $outhead,$outtail
214 stvx $stage,0,$out
215 addi $out,$out,16
216 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
217 vspltisb $key,8 # borrow $key
218 mtctr $cnt
219 vsububm $mask,$mask,$key # adjust the mask
221 Loop192:
222 vperm $key,$in1,$in1,$mask # roate-n-splat
223 vsldoi $tmp,$zero,$in0,12 # >>32
224 vcipherlast $key,$key,$rcon
226 vxor $in0,$in0,$tmp
227 vsldoi $tmp,$zero,$tmp,12 # >>32
228 vxor $in0,$in0,$tmp
229 vsldoi $tmp,$zero,$tmp,12 # >>32
230 vxor $in0,$in0,$tmp
232 vsldoi $stage,$zero,$in1,8
233 vspltw $tmp,$in0,3
234 vxor $tmp,$tmp,$in1
235 vsldoi $in1,$zero,$in1,12 # >>32
236 vadduwm $rcon,$rcon,$rcon
237 vxor $in1,$in1,$tmp
238 vxor $in0,$in0,$key
239 vxor $in1,$in1,$key
240 vsldoi $stage,$stage,$in0,8
242 vperm $key,$in1,$in1,$mask # rotate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vperm $outtail,$stage,$stage,$outperm # rotate
245 vsel $stage,$outhead,$outtail,$outmask
246 vmr $outhead,$outtail
247 vcipherlast $key,$key,$rcon
248 stvx $stage,0,$out
249 addi $out,$out,16
251 vsldoi $stage,$in0,$in1,8
252 vxor $in0,$in0,$tmp
253 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vperm $outtail,$stage,$stage,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
257 vxor $in0,$in0,$tmp
258 vsldoi $tmp,$zero,$tmp,12 # >>32
259 vxor $in0,$in0,$tmp
260 stvx $stage,0,$out
261 addi $out,$out,16
263 vspltw $tmp,$in0,3
264 vxor $tmp,$tmp,$in1
265 vsldoi $in1,$zero,$in1,12 # >>32
266 vadduwm $rcon,$rcon,$rcon
267 vxor $in1,$in1,$tmp
268 vxor $in0,$in0,$key
269 vxor $in1,$in1,$key
270 vperm $outtail,$in0,$in0,$outperm # rotate
271 vsel $stage,$outhead,$outtail,$outmask
272 vmr $outhead,$outtail
273 stvx $stage,0,$out
274 addi $inp,$out,15 # 15 is not typo
275 addi $out,$out,16
276 bdnz Loop192
278 li $rounds,12
279 addi $out,$out,0x20
280 b Ldone
282 .align 4
283 L256:
284 lvx $tmp,0,$inp
285 li $cnt,7
286 li $rounds,14
287 vperm $outtail,$in0,$in0,$outperm # rotate
288 vsel $stage,$outhead,$outtail,$outmask
289 vmr $outhead,$outtail
290 stvx $stage,0,$out
291 addi $out,$out,16
292 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
293 mtctr $cnt
295 Loop256:
296 vperm $key,$in1,$in1,$mask # rotate-n-splat
297 vsldoi $tmp,$zero,$in0,12 # >>32
298 vperm $outtail,$in1,$in1,$outperm # rotate
299 vsel $stage,$outhead,$outtail,$outmask
300 vmr $outhead,$outtail
301 vcipherlast $key,$key,$rcon
302 stvx $stage,0,$out
303 addi $out,$out,16
305 vxor $in0,$in0,$tmp
306 vsldoi $tmp,$zero,$tmp,12 # >>32
307 vxor $in0,$in0,$tmp
308 vsldoi $tmp,$zero,$tmp,12 # >>32
309 vxor $in0,$in0,$tmp
310 vadduwm $rcon,$rcon,$rcon
311 vxor $in0,$in0,$key
312 vperm $outtail,$in0,$in0,$outperm # rotate
313 vsel $stage,$outhead,$outtail,$outmask
314 vmr $outhead,$outtail
315 stvx $stage,0,$out
316 addi $inp,$out,15 # 15 is not typo
317 addi $out,$out,16
318 bdz Ldone
320 vspltw $key,$in0,3 # just splat
321 vsldoi $tmp,$zero,$in1,12 # >>32
322 vsbox $key,$key
324 vxor $in1,$in1,$tmp
325 vsldoi $tmp,$zero,$tmp,12 # >>32
326 vxor $in1,$in1,$tmp
327 vsldoi $tmp,$zero,$tmp,12 # >>32
328 vxor $in1,$in1,$tmp
330 vxor $in1,$in1,$key
331 b Loop256
333 .align 4
334 Ldone:
335 lvx $in1,0,$inp # redundant in aligned case
336 vsel $in1,$outhead,$in1,$outmask
337 stvx $in1,0,$inp
338 li $ptr,0
339 mtspr 256,$vrsave
340 stw $rounds,0($out)
342 Lenc_key_abort:
343 mr r3,$ptr
345 .long 0
346 .byte 0,12,0x14,1,0,0,3,0
347 .long 0
348 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
350 .globl .${prefix}_set_decrypt_key
351 .align 5
352 .${prefix}_set_decrypt_key:
353 $STU $sp,-$FRAME($sp)
354 mflr r10
355 $PUSH r10,$FRAME+$LRSAVE($sp)
356 bl Lset_encrypt_key
357 mtlr r10
359 cmpwi r3,0
360 bne- Ldec_key_abort
362 slwi $cnt,$rounds,4
363 subi $inp,$out,240 # first round key
364 srwi $rounds,$rounds,1
365 add $out,$inp,$cnt # last round key
366 mtctr $rounds
368 Ldeckey:
369 lwz r0, 0($inp)
370 lwz r6, 4($inp)
371 lwz r7, 8($inp)
372 lwz r8, 12($inp)
373 addi $inp,$inp,16
374 lwz r9, 0($out)
375 lwz r10,4($out)
376 lwz r11,8($out)
377 lwz r12,12($out)
378 stw r0, 0($out)
379 stw r6, 4($out)
380 stw r7, 8($out)
381 stw r8, 12($out)
382 subi $out,$out,16
383 stw r9, -16($inp)
384 stw r10,-12($inp)
385 stw r11,-8($inp)
386 stw r12,-4($inp)
387 bdnz Ldeckey
389 xor r3,r3,r3 # return value
390 Ldec_key_abort:
391 addi $sp,$sp,$FRAME
393 .long 0
394 .byte 0,12,4,1,0x80,0,3,0
395 .long 0
396 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
399 #########################################################################
400 {{{ # Single block en- and decrypt procedures #
401 sub gen_block () {
402 my $dir = shift;
403 my $n = $dir eq "de" ? "n" : "";
404 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
406 $code.=<<___;
407 .globl .${prefix}_${dir}crypt
408 .align 5
409 .${prefix}_${dir}crypt:
410 lwz $rounds,240($key)
411 lis r0,0xfc00
412 mfspr $vrsave,256
413 li $idx,15 # 15 is not typo
414 mtspr 256,r0
416 lvx v0,0,$inp
417 neg r11,$out
418 lvx v1,$idx,$inp
419 lvsl v2,0,$inp # inpperm
420 le?vspltisb v4,0x0f
421 ?lvsl v3,0,r11 # outperm
422 le?vxor v2,v2,v4
423 li $idx,16
424 vperm v0,v0,v1,v2 # align [and byte swap in LE]
425 lvx v1,0,$key
426 ?lvsl v5,0,$key # keyperm
427 srwi $rounds,$rounds,1
428 lvx v2,$idx,$key
429 addi $idx,$idx,16
430 subi $rounds,$rounds,1
431 ?vperm v1,v1,v2,v5 # align round key
433 vxor v0,v0,v1
434 lvx v1,$idx,$key
435 addi $idx,$idx,16
436 mtctr $rounds
438 Loop_${dir}c:
439 ?vperm v2,v2,v1,v5
440 v${n}cipher v0,v0,v2
441 lvx v2,$idx,$key
442 addi $idx,$idx,16
443 ?vperm v1,v1,v2,v5
444 v${n}cipher v0,v0,v1
445 lvx v1,$idx,$key
446 addi $idx,$idx,16
447 bdnz Loop_${dir}c
449 ?vperm v2,v2,v1,v5
450 v${n}cipher v0,v0,v2
451 lvx v2,$idx,$key
452 ?vperm v1,v1,v2,v5
453 v${n}cipherlast v0,v0,v1
455 vspltisb v2,-1
456 vxor v1,v1,v1
457 li $idx,15 # 15 is not typo
458 ?vperm v2,v1,v2,v3 # outmask
459 le?vxor v3,v3,v4
460 lvx v1,0,$out # outhead
461 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
462 vsel v1,v1,v0,v2
463 lvx v4,$idx,$out
464 stvx v1,0,$out
465 vsel v0,v0,v4,v2
466 stvx v0,$idx,$out
468 mtspr 256,$vrsave
470 .long 0
471 .byte 0,12,0x14,0,0,0,3,0
472 .long 0
473 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
476 &gen_block("en");
477 &gen_block("de");
479 #########################################################################
480 {{{ # CBC en- and decrypt procedures #
481 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
482 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
483 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
484 map("v$_",(4..10));
485 $code.=<<___;
486 .globl .${prefix}_cbc_encrypt
487 .align 5
488 .${prefix}_cbc_encrypt:
489 ${UCMP}i $len,16
490 bltlr-
492 cmpwi $enc,0 # test direction
493 lis r0,0xffe0
494 mfspr $vrsave,256
495 mtspr 256,r0
497 li $idx,15
498 vxor $rndkey0,$rndkey0,$rndkey0
499 le?vspltisb $tmp,0x0f
501 lvx $ivec,0,$ivp # load [unaligned] iv
502 lvsl $inpperm,0,$ivp
503 lvx $inptail,$idx,$ivp
504 le?vxor $inpperm,$inpperm,$tmp
505 vperm $ivec,$ivec,$inptail,$inpperm
507 neg r11,$inp
508 ?lvsl $keyperm,0,$key # prepare for unaligned key
509 lwz $rounds,240($key)
511 lvsr $inpperm,0,r11 # prepare for unaligned load
512 lvx $inptail,0,$inp
513 addi $inp,$inp,15 # 15 is not typo
514 le?vxor $inpperm,$inpperm,$tmp
516 ?lvsr $outperm,0,$out # prepare for unaligned store
517 vspltisb $outmask,-1
518 lvx $outhead,0,$out
519 ?vperm $outmask,$rndkey0,$outmask,$outperm
520 le?vxor $outperm,$outperm,$tmp
522 srwi $rounds,$rounds,1
523 li $idx,16
524 subi $rounds,$rounds,1
525 beq Lcbc_dec
527 Lcbc_enc:
528 vmr $inout,$inptail
529 lvx $inptail,0,$inp
530 addi $inp,$inp,16
531 mtctr $rounds
532 subi $len,$len,16 # len-=16
534 lvx $rndkey0,0,$key
535 vperm $inout,$inout,$inptail,$inpperm
536 lvx $rndkey1,$idx,$key
537 addi $idx,$idx,16
538 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
539 vxor $inout,$inout,$rndkey0
540 lvx $rndkey0,$idx,$key
541 addi $idx,$idx,16
542 vxor $inout,$inout,$ivec
544 Loop_cbc_enc:
545 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
546 vcipher $inout,$inout,$rndkey1
547 lvx $rndkey1,$idx,$key
548 addi $idx,$idx,16
549 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
550 vcipher $inout,$inout,$rndkey0
551 lvx $rndkey0,$idx,$key
552 addi $idx,$idx,16
553 bdnz Loop_cbc_enc
555 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
556 vcipher $inout,$inout,$rndkey1
557 lvx $rndkey1,$idx,$key
558 li $idx,16
559 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
560 vcipherlast $ivec,$inout,$rndkey0
561 ${UCMP}i $len,16
563 vperm $tmp,$ivec,$ivec,$outperm
564 vsel $inout,$outhead,$tmp,$outmask
565 vmr $outhead,$tmp
566 stvx $inout,0,$out
567 addi $out,$out,16
568 bge Lcbc_enc
570 b Lcbc_done
572 .align 4
573 Lcbc_dec:
574 ${UCMP}i $len,128
575 bge _aesp8_cbc_decrypt8x
576 vmr $tmp,$inptail
577 lvx $inptail,0,$inp
578 addi $inp,$inp,16
579 mtctr $rounds
580 subi $len,$len,16 # len-=16
582 lvx $rndkey0,0,$key
583 vperm $tmp,$tmp,$inptail,$inpperm
584 lvx $rndkey1,$idx,$key
585 addi $idx,$idx,16
586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
587 vxor $inout,$tmp,$rndkey0
588 lvx $rndkey0,$idx,$key
589 addi $idx,$idx,16
591 Loop_cbc_dec:
592 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
593 vncipher $inout,$inout,$rndkey1
594 lvx $rndkey1,$idx,$key
595 addi $idx,$idx,16
596 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
597 vncipher $inout,$inout,$rndkey0
598 lvx $rndkey0,$idx,$key
599 addi $idx,$idx,16
600 bdnz Loop_cbc_dec
602 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
603 vncipher $inout,$inout,$rndkey1
604 lvx $rndkey1,$idx,$key
605 li $idx,16
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vncipherlast $inout,$inout,$rndkey0
608 ${UCMP}i $len,16
610 vxor $inout,$inout,$ivec
611 vmr $ivec,$tmp
612 vperm $tmp,$inout,$inout,$outperm
613 vsel $inout,$outhead,$tmp,$outmask
614 vmr $outhead,$tmp
615 stvx $inout,0,$out
616 addi $out,$out,16
617 bge Lcbc_dec
619 Lcbc_done:
620 addi $out,$out,-1
621 lvx $inout,0,$out # redundant in aligned case
622 vsel $inout,$outhead,$inout,$outmask
623 stvx $inout,0,$out
625 neg $enc,$ivp # write [unaligned] iv
626 li $idx,15 # 15 is not typo
627 vxor $rndkey0,$rndkey0,$rndkey0
628 vspltisb $outmask,-1
629 le?vspltisb $tmp,0x0f
630 ?lvsl $outperm,0,$enc
631 ?vperm $outmask,$rndkey0,$outmask,$outperm
632 le?vxor $outperm,$outperm,$tmp
633 lvx $outhead,0,$ivp
634 vperm $ivec,$ivec,$ivec,$outperm
635 vsel $inout,$outhead,$ivec,$outmask
636 lvx $inptail,$idx,$ivp
637 stvx $inout,0,$ivp
638 vsel $inout,$ivec,$inptail,$outmask
639 stvx $inout,$idx,$ivp
641 mtspr 256,$vrsave
643 .long 0
644 .byte 0,12,0x14,0,0,0,6,0
645 .long 0
647 #########################################################################
648 {{ # Optimized CBC decrypt procedure #
649 my $key_="r11";
650 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
651 $x00=0 if ($flavour =~ /osx/);
652 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
653 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
654 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
655 # v26-v31 last 6 round keys
656 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
658 $code.=<<___;
659 .align 5
660 _aesp8_cbc_decrypt8x:
661 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
662 li r10,`$FRAME+8*16+15`
663 li r11,`$FRAME+8*16+31`
664 stvx v20,r10,$sp # ABI says so
665 addi r10,r10,32
666 stvx v21,r11,$sp
667 addi r11,r11,32
668 stvx v22,r10,$sp
669 addi r10,r10,32
670 stvx v23,r11,$sp
671 addi r11,r11,32
672 stvx v24,r10,$sp
673 addi r10,r10,32
674 stvx v25,r11,$sp
675 addi r11,r11,32
676 stvx v26,r10,$sp
677 addi r10,r10,32
678 stvx v27,r11,$sp
679 addi r11,r11,32
680 stvx v28,r10,$sp
681 addi r10,r10,32
682 stvx v29,r11,$sp
683 addi r11,r11,32
684 stvx v30,r10,$sp
685 stvx v31,r11,$sp
686 li r0,-1
687 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
688 li $x10,0x10
689 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
690 li $x20,0x20
691 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
692 li $x30,0x30
693 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
694 li $x40,0x40
695 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
696 li $x50,0x50
697 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
698 li $x60,0x60
699 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
700 li $x70,0x70
701 mtspr 256,r0
703 subi $rounds,$rounds,3 # -4 in total
704 subi $len,$len,128 # bias
706 lvx $rndkey0,$x00,$key # load key schedule
707 lvx v30,$x10,$key
708 addi $key,$key,0x20
709 lvx v31,$x00,$key
710 ?vperm $rndkey0,$rndkey0,v30,$keyperm
711 addi $key_,$sp,$FRAME+15
712 mtctr $rounds
714 Load_cbc_dec_key:
715 ?vperm v24,v30,v31,$keyperm
716 lvx v30,$x10,$key
717 addi $key,$key,0x20
718 stvx v24,$x00,$key_ # off-load round[1]
719 ?vperm v25,v31,v30,$keyperm
720 lvx v31,$x00,$key
721 stvx v25,$x10,$key_ # off-load round[2]
722 addi $key_,$key_,0x20
723 bdnz Load_cbc_dec_key
725 lvx v26,$x10,$key
726 ?vperm v24,v30,v31,$keyperm
727 lvx v27,$x20,$key
728 stvx v24,$x00,$key_ # off-load round[3]
729 ?vperm v25,v31,v26,$keyperm
730 lvx v28,$x30,$key
731 stvx v25,$x10,$key_ # off-load round[4]
732 addi $key_,$sp,$FRAME+15 # rewind $key_
733 ?vperm v26,v26,v27,$keyperm
734 lvx v29,$x40,$key
735 ?vperm v27,v27,v28,$keyperm
736 lvx v30,$x50,$key
737 ?vperm v28,v28,v29,$keyperm
738 lvx v31,$x60,$key
739 ?vperm v29,v29,v30,$keyperm
740 lvx $out0,$x70,$key # borrow $out0
741 ?vperm v30,v30,v31,$keyperm
742 lvx v24,$x00,$key_ # pre-load round[1]
743 ?vperm v31,v31,$out0,$keyperm
744 lvx v25,$x10,$key_ # pre-load round[2]
746 #lvx $inptail,0,$inp # "caller" already did this
747 #addi $inp,$inp,15 # 15 is not typo
748 subi $inp,$inp,15 # undo "caller"
750 le?li $idx,8
751 lvx_u $in0,$x00,$inp # load first 8 "words"
752 le?lvsl $inpperm,0,$idx
753 le?vspltisb $tmp,0x0f
754 lvx_u $in1,$x10,$inp
755 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
756 lvx_u $in2,$x20,$inp
757 le?vperm $in0,$in0,$in0,$inpperm
758 lvx_u $in3,$x30,$inp
759 le?vperm $in1,$in1,$in1,$inpperm
760 lvx_u $in4,$x40,$inp
761 le?vperm $in2,$in2,$in2,$inpperm
762 vxor $out0,$in0,$rndkey0
763 lvx_u $in5,$x50,$inp
764 le?vperm $in3,$in3,$in3,$inpperm
765 vxor $out1,$in1,$rndkey0
766 lvx_u $in6,$x60,$inp
767 le?vperm $in4,$in4,$in4,$inpperm
768 vxor $out2,$in2,$rndkey0
769 lvx_u $in7,$x70,$inp
770 addi $inp,$inp,0x80
771 le?vperm $in5,$in5,$in5,$inpperm
772 vxor $out3,$in3,$rndkey0
773 le?vperm $in6,$in6,$in6,$inpperm
774 vxor $out4,$in4,$rndkey0
775 le?vperm $in7,$in7,$in7,$inpperm
776 vxor $out5,$in5,$rndkey0
777 vxor $out6,$in6,$rndkey0
778 vxor $out7,$in7,$rndkey0
780 mtctr $rounds
781 b Loop_cbc_dec8x
782 .align 5
783 Loop_cbc_dec8x:
784 vncipher $out0,$out0,v24
785 vncipher $out1,$out1,v24
786 vncipher $out2,$out2,v24
787 vncipher $out3,$out3,v24
788 vncipher $out4,$out4,v24
789 vncipher $out5,$out5,v24
790 vncipher $out6,$out6,v24
791 vncipher $out7,$out7,v24
792 lvx v24,$x20,$key_ # round[3]
793 addi $key_,$key_,0x20
795 vncipher $out0,$out0,v25
796 vncipher $out1,$out1,v25
797 vncipher $out2,$out2,v25
798 vncipher $out3,$out3,v25
799 vncipher $out4,$out4,v25
800 vncipher $out5,$out5,v25
801 vncipher $out6,$out6,v25
802 vncipher $out7,$out7,v25
803 lvx v25,$x10,$key_ # round[4]
804 bdnz Loop_cbc_dec8x
806 subic $len,$len,128 # $len-=128
807 vncipher $out0,$out0,v24
808 vncipher $out1,$out1,v24
809 vncipher $out2,$out2,v24
810 vncipher $out3,$out3,v24
811 vncipher $out4,$out4,v24
812 vncipher $out5,$out5,v24
813 vncipher $out6,$out6,v24
814 vncipher $out7,$out7,v24
816 subfe. r0,r0,r0 # borrow?-1:0
817 vncipher $out0,$out0,v25
818 vncipher $out1,$out1,v25
819 vncipher $out2,$out2,v25
820 vncipher $out3,$out3,v25
821 vncipher $out4,$out4,v25
822 vncipher $out5,$out5,v25
823 vncipher $out6,$out6,v25
824 vncipher $out7,$out7,v25
826 and r0,r0,$len
827 vncipher $out0,$out0,v26
828 vncipher $out1,$out1,v26
829 vncipher $out2,$out2,v26
830 vncipher $out3,$out3,v26
831 vncipher $out4,$out4,v26
832 vncipher $out5,$out5,v26
833 vncipher $out6,$out6,v26
834 vncipher $out7,$out7,v26
836 add $inp,$inp,r0 # $inp is adjusted in such
837 # way that at exit from the
838 # loop inX-in7 are loaded
839 # with last "words"
840 vncipher $out0,$out0,v27
841 vncipher $out1,$out1,v27
842 vncipher $out2,$out2,v27
843 vncipher $out3,$out3,v27
844 vncipher $out4,$out4,v27
845 vncipher $out5,$out5,v27
846 vncipher $out6,$out6,v27
847 vncipher $out7,$out7,v27
849 addi $key_,$sp,$FRAME+15 # rewind $key_
850 vncipher $out0,$out0,v28
851 vncipher $out1,$out1,v28
852 vncipher $out2,$out2,v28
853 vncipher $out3,$out3,v28
854 vncipher $out4,$out4,v28
855 vncipher $out5,$out5,v28
856 vncipher $out6,$out6,v28
857 vncipher $out7,$out7,v28
858 lvx v24,$x00,$key_ # re-pre-load round[1]
860 vncipher $out0,$out0,v29
861 vncipher $out1,$out1,v29
862 vncipher $out2,$out2,v29
863 vncipher $out3,$out3,v29
864 vncipher $out4,$out4,v29
865 vncipher $out5,$out5,v29
866 vncipher $out6,$out6,v29
867 vncipher $out7,$out7,v29
868 lvx v25,$x10,$key_ # re-pre-load round[2]
870 vncipher $out0,$out0,v30
871 vxor $ivec,$ivec,v31 # xor with last round key
872 vncipher $out1,$out1,v30
873 vxor $in0,$in0,v31
874 vncipher $out2,$out2,v30
875 vxor $in1,$in1,v31
876 vncipher $out3,$out3,v30
877 vxor $in2,$in2,v31
878 vncipher $out4,$out4,v30
879 vxor $in3,$in3,v31
880 vncipher $out5,$out5,v30
881 vxor $in4,$in4,v31
882 vncipher $out6,$out6,v30
883 vxor $in5,$in5,v31
884 vncipher $out7,$out7,v30
885 vxor $in6,$in6,v31
887 vncipherlast $out0,$out0,$ivec
888 vncipherlast $out1,$out1,$in0
889 lvx_u $in0,$x00,$inp # load next input block
890 vncipherlast $out2,$out2,$in1
891 lvx_u $in1,$x10,$inp
892 vncipherlast $out3,$out3,$in2
893 le?vperm $in0,$in0,$in0,$inpperm
894 lvx_u $in2,$x20,$inp
895 vncipherlast $out4,$out4,$in3
896 le?vperm $in1,$in1,$in1,$inpperm
897 lvx_u $in3,$x30,$inp
898 vncipherlast $out5,$out5,$in4
899 le?vperm $in2,$in2,$in2,$inpperm
900 lvx_u $in4,$x40,$inp
901 vncipherlast $out6,$out6,$in5
902 le?vperm $in3,$in3,$in3,$inpperm
903 lvx_u $in5,$x50,$inp
904 vncipherlast $out7,$out7,$in6
905 le?vperm $in4,$in4,$in4,$inpperm
906 lvx_u $in6,$x60,$inp
907 vmr $ivec,$in7
908 le?vperm $in5,$in5,$in5,$inpperm
909 lvx_u $in7,$x70,$inp
910 addi $inp,$inp,0x80
912 le?vperm $out0,$out0,$out0,$inpperm
913 le?vperm $out1,$out1,$out1,$inpperm
914 stvx_u $out0,$x00,$out
915 le?vperm $in6,$in6,$in6,$inpperm
916 vxor $out0,$in0,$rndkey0
917 le?vperm $out2,$out2,$out2,$inpperm
918 stvx_u $out1,$x10,$out
919 le?vperm $in7,$in7,$in7,$inpperm
920 vxor $out1,$in1,$rndkey0
921 le?vperm $out3,$out3,$out3,$inpperm
922 stvx_u $out2,$x20,$out
923 vxor $out2,$in2,$rndkey0
924 le?vperm $out4,$out4,$out4,$inpperm
925 stvx_u $out3,$x30,$out
926 vxor $out3,$in3,$rndkey0
927 le?vperm $out5,$out5,$out5,$inpperm
928 stvx_u $out4,$x40,$out
929 vxor $out4,$in4,$rndkey0
930 le?vperm $out6,$out6,$out6,$inpperm
931 stvx_u $out5,$x50,$out
932 vxor $out5,$in5,$rndkey0
933 le?vperm $out7,$out7,$out7,$inpperm
934 stvx_u $out6,$x60,$out
935 vxor $out6,$in6,$rndkey0
936 stvx_u $out7,$x70,$out
937 addi $out,$out,0x80
938 vxor $out7,$in7,$rndkey0
940 mtctr $rounds
941 beq Loop_cbc_dec8x # did $len-=128 borrow?
943 addic. $len,$len,128
944 beq Lcbc_dec8x_done
948 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
949 vncipher $out1,$out1,v24
950 vncipher $out2,$out2,v24
951 vncipher $out3,$out3,v24
952 vncipher $out4,$out4,v24
953 vncipher $out5,$out5,v24
954 vncipher $out6,$out6,v24
955 vncipher $out7,$out7,v24
956 lvx v24,$x20,$key_ # round[3]
957 addi $key_,$key_,0x20
959 vncipher $out1,$out1,v25
960 vncipher $out2,$out2,v25
961 vncipher $out3,$out3,v25
962 vncipher $out4,$out4,v25
963 vncipher $out5,$out5,v25
964 vncipher $out6,$out6,v25
965 vncipher $out7,$out7,v25
966 lvx v25,$x10,$key_ # round[4]
967 bdnz Loop_cbc_dec8x_tail
969 vncipher $out1,$out1,v24
970 vncipher $out2,$out2,v24
971 vncipher $out3,$out3,v24
972 vncipher $out4,$out4,v24
973 vncipher $out5,$out5,v24
974 vncipher $out6,$out6,v24
975 vncipher $out7,$out7,v24
977 vncipher $out1,$out1,v25
978 vncipher $out2,$out2,v25
979 vncipher $out3,$out3,v25
980 vncipher $out4,$out4,v25
981 vncipher $out5,$out5,v25
982 vncipher $out6,$out6,v25
983 vncipher $out7,$out7,v25
985 vncipher $out1,$out1,v26
986 vncipher $out2,$out2,v26
987 vncipher $out3,$out3,v26
988 vncipher $out4,$out4,v26
989 vncipher $out5,$out5,v26
990 vncipher $out6,$out6,v26
991 vncipher $out7,$out7,v26
993 vncipher $out1,$out1,v27
994 vncipher $out2,$out2,v27
995 vncipher $out3,$out3,v27
996 vncipher $out4,$out4,v27
997 vncipher $out5,$out5,v27
998 vncipher $out6,$out6,v27
999 vncipher $out7,$out7,v27
1001 vncipher $out1,$out1,v28
1002 vncipher $out2,$out2,v28
1003 vncipher $out3,$out3,v28
1004 vncipher $out4,$out4,v28
1005 vncipher $out5,$out5,v28
1006 vncipher $out6,$out6,v28
1007 vncipher $out7,$out7,v28
1009 vncipher $out1,$out1,v29
1010 vncipher $out2,$out2,v29
1011 vncipher $out3,$out3,v29
1012 vncipher $out4,$out4,v29
1013 vncipher $out5,$out5,v29
1014 vncipher $out6,$out6,v29
1015 vncipher $out7,$out7,v29
1017 vncipher $out1,$out1,v30
1018 vxor $ivec,$ivec,v31 # last round key
1019 vncipher $out2,$out2,v30
1020 vxor $in1,$in1,v31
1021 vncipher $out3,$out3,v30
1022 vxor $in2,$in2,v31
1023 vncipher $out4,$out4,v30
1024 vxor $in3,$in3,v31
1025 vncipher $out5,$out5,v30
1026 vxor $in4,$in4,v31
1027 vncipher $out6,$out6,v30
1028 vxor $in5,$in5,v31
1029 vncipher $out7,$out7,v30
1030 vxor $in6,$in6,v31
1032 cmplwi $len,32 # switch($len)
1033 blt Lcbc_dec8x_one
1035 beq Lcbc_dec8x_two
1036 cmplwi $len,64
1037 blt Lcbc_dec8x_three
1039 beq Lcbc_dec8x_four
1040 cmplwi $len,96
1041 blt Lcbc_dec8x_five
1043 beq Lcbc_dec8x_six
1045 Lcbc_dec8x_seven:
1046 vncipherlast $out1,$out1,$ivec
1047 vncipherlast $out2,$out2,$in1
1048 vncipherlast $out3,$out3,$in2
1049 vncipherlast $out4,$out4,$in3
1050 vncipherlast $out5,$out5,$in4
1051 vncipherlast $out6,$out6,$in5
1052 vncipherlast $out7,$out7,$in6
1053 vmr $ivec,$in7
1055 le?vperm $out1,$out1,$out1,$inpperm
1056 le?vperm $out2,$out2,$out2,$inpperm
1057 stvx_u $out1,$x00,$out
1058 le?vperm $out3,$out3,$out3,$inpperm
1059 stvx_u $out2,$x10,$out
1060 le?vperm $out4,$out4,$out4,$inpperm
1061 stvx_u $out3,$x20,$out
1062 le?vperm $out5,$out5,$out5,$inpperm
1063 stvx_u $out4,$x30,$out
1064 le?vperm $out6,$out6,$out6,$inpperm
1065 stvx_u $out5,$x40,$out
1066 le?vperm $out7,$out7,$out7,$inpperm
1067 stvx_u $out6,$x50,$out
1068 stvx_u $out7,$x60,$out
1069 addi $out,$out,0x70
1070 b Lcbc_dec8x_done
1072 .align 5
1073 Lcbc_dec8x_six:
1074 vncipherlast $out2,$out2,$ivec
1075 vncipherlast $out3,$out3,$in2
1076 vncipherlast $out4,$out4,$in3
1077 vncipherlast $out5,$out5,$in4
1078 vncipherlast $out6,$out6,$in5
1079 vncipherlast $out7,$out7,$in6
1080 vmr $ivec,$in7
1082 le?vperm $out2,$out2,$out2,$inpperm
1083 le?vperm $out3,$out3,$out3,$inpperm
1084 stvx_u $out2,$x00,$out
1085 le?vperm $out4,$out4,$out4,$inpperm
1086 stvx_u $out3,$x10,$out
1087 le?vperm $out5,$out5,$out5,$inpperm
1088 stvx_u $out4,$x20,$out
1089 le?vperm $out6,$out6,$out6,$inpperm
1090 stvx_u $out5,$x30,$out
1091 le?vperm $out7,$out7,$out7,$inpperm
1092 stvx_u $out6,$x40,$out
1093 stvx_u $out7,$x50,$out
1094 addi $out,$out,0x60
1095 b Lcbc_dec8x_done
1097 .align 5
1098 Lcbc_dec8x_five:
1099 vncipherlast $out3,$out3,$ivec
1100 vncipherlast $out4,$out4,$in3
1101 vncipherlast $out5,$out5,$in4
1102 vncipherlast $out6,$out6,$in5
1103 vncipherlast $out7,$out7,$in6
1104 vmr $ivec,$in7
1106 le?vperm $out3,$out3,$out3,$inpperm
1107 le?vperm $out4,$out4,$out4,$inpperm
1108 stvx_u $out3,$x00,$out
1109 le?vperm $out5,$out5,$out5,$inpperm
1110 stvx_u $out4,$x10,$out
1111 le?vperm $out6,$out6,$out6,$inpperm
1112 stvx_u $out5,$x20,$out
1113 le?vperm $out7,$out7,$out7,$inpperm
1114 stvx_u $out6,$x30,$out
1115 stvx_u $out7,$x40,$out
1116 addi $out,$out,0x50
1117 b Lcbc_dec8x_done
1119 .align 5
1120 Lcbc_dec8x_four:
1121 vncipherlast $out4,$out4,$ivec
1122 vncipherlast $out5,$out5,$in4
1123 vncipherlast $out6,$out6,$in5
1124 vncipherlast $out7,$out7,$in6
1125 vmr $ivec,$in7
1127 le?vperm $out4,$out4,$out4,$inpperm
1128 le?vperm $out5,$out5,$out5,$inpperm
1129 stvx_u $out4,$x00,$out
1130 le?vperm $out6,$out6,$out6,$inpperm
1131 stvx_u $out5,$x10,$out
1132 le?vperm $out7,$out7,$out7,$inpperm
1133 stvx_u $out6,$x20,$out
1134 stvx_u $out7,$x30,$out
1135 addi $out,$out,0x40
1136 b Lcbc_dec8x_done
1138 .align 5
1139 Lcbc_dec8x_three:
1140 vncipherlast $out5,$out5,$ivec
1141 vncipherlast $out6,$out6,$in5
1142 vncipherlast $out7,$out7,$in6
1143 vmr $ivec,$in7
1145 le?vperm $out5,$out5,$out5,$inpperm
1146 le?vperm $out6,$out6,$out6,$inpperm
1147 stvx_u $out5,$x00,$out
1148 le?vperm $out7,$out7,$out7,$inpperm
1149 stvx_u $out6,$x10,$out
1150 stvx_u $out7,$x20,$out
1151 addi $out,$out,0x30
1152 b Lcbc_dec8x_done
1154 .align 5
1155 Lcbc_dec8x_two:
1156 vncipherlast $out6,$out6,$ivec
1157 vncipherlast $out7,$out7,$in6
1158 vmr $ivec,$in7
1160 le?vperm $out6,$out6,$out6,$inpperm
1161 le?vperm $out7,$out7,$out7,$inpperm
1162 stvx_u $out6,$x00,$out
1163 stvx_u $out7,$x10,$out
1164 addi $out,$out,0x20
1165 b Lcbc_dec8x_done
1167 .align 5
1168 Lcbc_dec8x_one:
1169 vncipherlast $out7,$out7,$ivec
1170 vmr $ivec,$in7
1172 le?vperm $out7,$out7,$out7,$inpperm
1173 stvx_u $out7,0,$out
1174 addi $out,$out,0x10
1176 Lcbc_dec8x_done:
1177 le?vperm $ivec,$ivec,$ivec,$inpperm
1178 stvx_u $ivec,0,$ivp # write [unaligned] iv
1180 li r10,`$FRAME+15`
1181 li r11,`$FRAME+31`
1182 stvx $inpperm,r10,$sp # wipe copies of round keys
1183 addi r10,r10,32
1184 stvx $inpperm,r11,$sp
1185 addi r11,r11,32
1186 stvx $inpperm,r10,$sp
1187 addi r10,r10,32
1188 stvx $inpperm,r11,$sp
1189 addi r11,r11,32
1190 stvx $inpperm,r10,$sp
1191 addi r10,r10,32
1192 stvx $inpperm,r11,$sp
1193 addi r11,r11,32
1194 stvx $inpperm,r10,$sp
1195 addi r10,r10,32
1196 stvx $inpperm,r11,$sp
1197 addi r11,r11,32
1199 mtspr 256,$vrsave
1200 lvx v20,r10,$sp # ABI says so
1201 addi r10,r10,32
1202 lvx v21,r11,$sp
1203 addi r11,r11,32
1204 lvx v22,r10,$sp
1205 addi r10,r10,32
1206 lvx v23,r11,$sp
1207 addi r11,r11,32
1208 lvx v24,r10,$sp
1209 addi r10,r10,32
1210 lvx v25,r11,$sp
1211 addi r11,r11,32
1212 lvx v26,r10,$sp
1213 addi r10,r10,32
1214 lvx v27,r11,$sp
1215 addi r11,r11,32
1216 lvx v28,r10,$sp
1217 addi r10,r10,32
1218 lvx v29,r11,$sp
1219 addi r11,r11,32
1220 lvx v30,r10,$sp
1221 lvx v31,r11,$sp
1222 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1223 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1224 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1225 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1226 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1227 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1228 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1230 .long 0
1231 .byte 0,12,0x04,0,0x80,6,6,0
1232 .long 0
1233 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1235 }} }}}
1237 #########################################################################
1238 {{{ # CTR procedure[s] #
1239 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1240 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1241 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1242 map("v$_",(4..11));
1243 my $dat=$tmp;
1245 $code.=<<___;
1246 .globl .${prefix}_ctr32_encrypt_blocks
1247 .align 5
1248 .${prefix}_ctr32_encrypt_blocks:
1249 ${UCMP}i $len,1
1250 bltlr-
1252 lis r0,0xfff0
1253 mfspr $vrsave,256
1254 mtspr 256,r0
1256 li $idx,15
1257 vxor $rndkey0,$rndkey0,$rndkey0
1258 le?vspltisb $tmp,0x0f
1260 lvx $ivec,0,$ivp # load [unaligned] iv
1261 lvsl $inpperm,0,$ivp
1262 lvx $inptail,$idx,$ivp
1263 vspltisb $one,1
1264 le?vxor $inpperm,$inpperm,$tmp
1265 vperm $ivec,$ivec,$inptail,$inpperm
1266 vsldoi $one,$rndkey0,$one,1
1268 neg r11,$inp
1269 ?lvsl $keyperm,0,$key # prepare for unaligned key
1270 lwz $rounds,240($key)
1272 lvsr $inpperm,0,r11 # prepare for unaligned load
1273 lvx $inptail,0,$inp
1274 addi $inp,$inp,15 # 15 is not typo
1275 le?vxor $inpperm,$inpperm,$tmp
1277 srwi $rounds,$rounds,1
1278 li $idx,16
1279 subi $rounds,$rounds,1
1281 ${UCMP}i $len,8
1282 bge _aesp8_ctr32_encrypt8x
1284 ?lvsr $outperm,0,$out # prepare for unaligned store
1285 vspltisb $outmask,-1
1286 lvx $outhead,0,$out
1287 ?vperm $outmask,$rndkey0,$outmask,$outperm
1288 le?vxor $outperm,$outperm,$tmp
1290 lvx $rndkey0,0,$key
1291 mtctr $rounds
1292 lvx $rndkey1,$idx,$key
1293 addi $idx,$idx,16
1294 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1295 vxor $inout,$ivec,$rndkey0
1296 lvx $rndkey0,$idx,$key
1297 addi $idx,$idx,16
1298 b Loop_ctr32_enc
1300 .align 5
1301 Loop_ctr32_enc:
1302 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1303 vcipher $inout,$inout,$rndkey1
1304 lvx $rndkey1,$idx,$key
1305 addi $idx,$idx,16
1306 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1307 vcipher $inout,$inout,$rndkey0
1308 lvx $rndkey0,$idx,$key
1309 addi $idx,$idx,16
1310 bdnz Loop_ctr32_enc
1312 vadduwm $ivec,$ivec,$one
1313 vmr $dat,$inptail
1314 lvx $inptail,0,$inp
1315 addi $inp,$inp,16
1316 subic. $len,$len,1 # blocks--
1318 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1319 vcipher $inout,$inout,$rndkey1
1320 lvx $rndkey1,$idx,$key
1321 vperm $dat,$dat,$inptail,$inpperm
1322 li $idx,16
1323 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1324 lvx $rndkey0,0,$key
1325 vxor $dat,$dat,$rndkey1 # last round key
1326 vcipherlast $inout,$inout,$dat
1328 lvx $rndkey1,$idx,$key
1329 addi $idx,$idx,16
1330 vperm $inout,$inout,$inout,$outperm
1331 vsel $dat,$outhead,$inout,$outmask
1332 mtctr $rounds
1333 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1334 vmr $outhead,$inout
1335 vxor $inout,$ivec,$rndkey0
1336 lvx $rndkey0,$idx,$key
1337 addi $idx,$idx,16
1338 stvx $dat,0,$out
1339 addi $out,$out,16
1340 bne Loop_ctr32_enc
1342 addi $out,$out,-1
1343 lvx $inout,0,$out # redundant in aligned case
1344 vsel $inout,$outhead,$inout,$outmask
1345 stvx $inout,0,$out
1347 mtspr 256,$vrsave
1349 .long 0
1350 .byte 0,12,0x14,0,0,0,6,0
1351 .long 0
1353 #########################################################################
1354 {{ # Optimized CTR procedure #
1355 my $key_="r11";
1356 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1357 $x00=0 if ($flavour =~ /osx/);
1358 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1359 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1360 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1361 # v26-v31 last 6 round keys
1362 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1363 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1365 $code.=<<___;
1366 .align 5
1367 _aesp8_ctr32_encrypt8x:
1368 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1369 li r10,`$FRAME+8*16+15`
1370 li r11,`$FRAME+8*16+31`
1371 stvx v20,r10,$sp # ABI says so
1372 addi r10,r10,32
1373 stvx v21,r11,$sp
1374 addi r11,r11,32
1375 stvx v22,r10,$sp
1376 addi r10,r10,32
1377 stvx v23,r11,$sp
1378 addi r11,r11,32
1379 stvx v24,r10,$sp
1380 addi r10,r10,32
1381 stvx v25,r11,$sp
1382 addi r11,r11,32
1383 stvx v26,r10,$sp
1384 addi r10,r10,32
1385 stvx v27,r11,$sp
1386 addi r11,r11,32
1387 stvx v28,r10,$sp
1388 addi r10,r10,32
1389 stvx v29,r11,$sp
1390 addi r11,r11,32
1391 stvx v30,r10,$sp
1392 stvx v31,r11,$sp
1393 li r0,-1
1394 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1395 li $x10,0x10
1396 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1397 li $x20,0x20
1398 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1399 li $x30,0x30
1400 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1401 li $x40,0x40
1402 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1403 li $x50,0x50
1404 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1405 li $x60,0x60
1406 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1407 li $x70,0x70
1408 mtspr 256,r0
1410 subi $rounds,$rounds,3 # -4 in total
1412 lvx $rndkey0,$x00,$key # load key schedule
1413 lvx v30,$x10,$key
1414 addi $key,$key,0x20
1415 lvx v31,$x00,$key
1416 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1417 addi $key_,$sp,$FRAME+15
1418 mtctr $rounds
1420 Load_ctr32_enc_key:
1421 ?vperm v24,v30,v31,$keyperm
1422 lvx v30,$x10,$key
1423 addi $key,$key,0x20
1424 stvx v24,$x00,$key_ # off-load round[1]
1425 ?vperm v25,v31,v30,$keyperm
1426 lvx v31,$x00,$key
1427 stvx v25,$x10,$key_ # off-load round[2]
1428 addi $key_,$key_,0x20
1429 bdnz Load_ctr32_enc_key
1431 lvx v26,$x10,$key
1432 ?vperm v24,v30,v31,$keyperm
1433 lvx v27,$x20,$key
1434 stvx v24,$x00,$key_ # off-load round[3]
1435 ?vperm v25,v31,v26,$keyperm
1436 lvx v28,$x30,$key
1437 stvx v25,$x10,$key_ # off-load round[4]
1438 addi $key_,$sp,$FRAME+15 # rewind $key_
1439 ?vperm v26,v26,v27,$keyperm
1440 lvx v29,$x40,$key
1441 ?vperm v27,v27,v28,$keyperm
1442 lvx v30,$x50,$key
1443 ?vperm v28,v28,v29,$keyperm
1444 lvx v31,$x60,$key
1445 ?vperm v29,v29,v30,$keyperm
1446 lvx $out0,$x70,$key # borrow $out0
1447 ?vperm v30,v30,v31,$keyperm
1448 lvx v24,$x00,$key_ # pre-load round[1]
1449 ?vperm v31,v31,$out0,$keyperm
1450 lvx v25,$x10,$key_ # pre-load round[2]
1452 vadduwm $two,$one,$one
1453 subi $inp,$inp,15 # undo "caller"
1454 $SHL $len,$len,4
1456 vadduwm $out1,$ivec,$one # counter values ...
1457 vadduwm $out2,$ivec,$two
1458 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1459 le?li $idx,8
1460 vadduwm $out3,$out1,$two
1461 vxor $out1,$out1,$rndkey0
1462 le?lvsl $inpperm,0,$idx
1463 vadduwm $out4,$out2,$two
1464 vxor $out2,$out2,$rndkey0
1465 le?vspltisb $tmp,0x0f
1466 vadduwm $out5,$out3,$two
1467 vxor $out3,$out3,$rndkey0
1468 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1469 vadduwm $out6,$out4,$two
1470 vxor $out4,$out4,$rndkey0
1471 vadduwm $out7,$out5,$two
1472 vxor $out5,$out5,$rndkey0
1473 vadduwm $ivec,$out6,$two # next counter value
1474 vxor $out6,$out6,$rndkey0
1475 vxor $out7,$out7,$rndkey0
1477 mtctr $rounds
1478 b Loop_ctr32_enc8x
1479 .align 5
1480 Loop_ctr32_enc8x:
1481 vcipher $out0,$out0,v24
1482 vcipher $out1,$out1,v24
1483 vcipher $out2,$out2,v24
1484 vcipher $out3,$out3,v24
1485 vcipher $out4,$out4,v24
1486 vcipher $out5,$out5,v24
1487 vcipher $out6,$out6,v24
1488 vcipher $out7,$out7,v24
1489 Loop_ctr32_enc8x_middle:
1490 lvx v24,$x20,$key_ # round[3]
1491 addi $key_,$key_,0x20
1493 vcipher $out0,$out0,v25
1494 vcipher $out1,$out1,v25
1495 vcipher $out2,$out2,v25
1496 vcipher $out3,$out3,v25
1497 vcipher $out4,$out4,v25
1498 vcipher $out5,$out5,v25
1499 vcipher $out6,$out6,v25
1500 vcipher $out7,$out7,v25
1501 lvx v25,$x10,$key_ # round[4]
1502 bdnz Loop_ctr32_enc8x
1504 subic r11,$len,256 # $len-256, borrow $key_
1505 vcipher $out0,$out0,v24
1506 vcipher $out1,$out1,v24
1507 vcipher $out2,$out2,v24
1508 vcipher $out3,$out3,v24
1509 vcipher $out4,$out4,v24
1510 vcipher $out5,$out5,v24
1511 vcipher $out6,$out6,v24
1512 vcipher $out7,$out7,v24
1514 subfe r0,r0,r0 # borrow?-1:0
1515 vcipher $out0,$out0,v25
1516 vcipher $out1,$out1,v25
1517 vcipher $out2,$out2,v25
1518 vcipher $out3,$out3,v25
1519 vcipher $out4,$out4,v25
1520 vcipher $out5,$out5,v25
1521 vcipher $out6,$out6,v25
1522 vcipher $out7,$out7,v25
1524 and r0,r0,r11
1525 addi $key_,$sp,$FRAME+15 # rewind $key_
1526 vcipher $out0,$out0,v26
1527 vcipher $out1,$out1,v26
1528 vcipher $out2,$out2,v26
1529 vcipher $out3,$out3,v26
1530 vcipher $out4,$out4,v26
1531 vcipher $out5,$out5,v26
1532 vcipher $out6,$out6,v26
1533 vcipher $out7,$out7,v26
1534 lvx v24,$x00,$key_ # re-pre-load round[1]
1536 subic $len,$len,129 # $len-=129
1537 vcipher $out0,$out0,v27
1538 addi $len,$len,1 # $len-=128 really
1539 vcipher $out1,$out1,v27
1540 vcipher $out2,$out2,v27
1541 vcipher $out3,$out3,v27
1542 vcipher $out4,$out4,v27
1543 vcipher $out5,$out5,v27
1544 vcipher $out6,$out6,v27
1545 vcipher $out7,$out7,v27
1546 lvx v25,$x10,$key_ # re-pre-load round[2]
1548 vcipher $out0,$out0,v28
1549 lvx_u $in0,$x00,$inp # load input
1550 vcipher $out1,$out1,v28
1551 lvx_u $in1,$x10,$inp
1552 vcipher $out2,$out2,v28
1553 lvx_u $in2,$x20,$inp
1554 vcipher $out3,$out3,v28
1555 lvx_u $in3,$x30,$inp
1556 vcipher $out4,$out4,v28
1557 lvx_u $in4,$x40,$inp
1558 vcipher $out5,$out5,v28
1559 lvx_u $in5,$x50,$inp
1560 vcipher $out6,$out6,v28
1561 lvx_u $in6,$x60,$inp
1562 vcipher $out7,$out7,v28
1563 lvx_u $in7,$x70,$inp
1564 addi $inp,$inp,0x80
1566 vcipher $out0,$out0,v29
1567 le?vperm $in0,$in0,$in0,$inpperm
1568 vcipher $out1,$out1,v29
1569 le?vperm $in1,$in1,$in1,$inpperm
1570 vcipher $out2,$out2,v29
1571 le?vperm $in2,$in2,$in2,$inpperm
1572 vcipher $out3,$out3,v29
1573 le?vperm $in3,$in3,$in3,$inpperm
1574 vcipher $out4,$out4,v29
1575 le?vperm $in4,$in4,$in4,$inpperm
1576 vcipher $out5,$out5,v29
1577 le?vperm $in5,$in5,$in5,$inpperm
1578 vcipher $out6,$out6,v29
1579 le?vperm $in6,$in6,$in6,$inpperm
1580 vcipher $out7,$out7,v29
1581 le?vperm $in7,$in7,$in7,$inpperm
1583 add $inp,$inp,r0 # $inp is adjusted in such
1584 # way that at exit from the
1585 # loop inX-in7 are loaded
1586 # with last "words"
1587 subfe. r0,r0,r0 # borrow?-1:0
1588 vcipher $out0,$out0,v30
1589 vxor $in0,$in0,v31 # xor with last round key
1590 vcipher $out1,$out1,v30
1591 vxor $in1,$in1,v31
1592 vcipher $out2,$out2,v30
1593 vxor $in2,$in2,v31
1594 vcipher $out3,$out3,v30
1595 vxor $in3,$in3,v31
1596 vcipher $out4,$out4,v30
1597 vxor $in4,$in4,v31
1598 vcipher $out5,$out5,v30
1599 vxor $in5,$in5,v31
1600 vcipher $out6,$out6,v30
1601 vxor $in6,$in6,v31
1602 vcipher $out7,$out7,v30
1603 vxor $in7,$in7,v31
1605 bne Lctr32_enc8x_break # did $len-129 borrow?
1607 vcipherlast $in0,$out0,$in0
1608 vcipherlast $in1,$out1,$in1
1609 vadduwm $out1,$ivec,$one # counter values ...
1610 vcipherlast $in2,$out2,$in2
1611 vadduwm $out2,$ivec,$two
1612 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1613 vcipherlast $in3,$out3,$in3
1614 vadduwm $out3,$out1,$two
1615 vxor $out1,$out1,$rndkey0
1616 vcipherlast $in4,$out4,$in4
1617 vadduwm $out4,$out2,$two
1618 vxor $out2,$out2,$rndkey0
1619 vcipherlast $in5,$out5,$in5
1620 vadduwm $out5,$out3,$two
1621 vxor $out3,$out3,$rndkey0
1622 vcipherlast $in6,$out6,$in6
1623 vadduwm $out6,$out4,$two
1624 vxor $out4,$out4,$rndkey0
1625 vcipherlast $in7,$out7,$in7
1626 vadduwm $out7,$out5,$two
1627 vxor $out5,$out5,$rndkey0
1628 le?vperm $in0,$in0,$in0,$inpperm
1629 vadduwm $ivec,$out6,$two # next counter value
1630 vxor $out6,$out6,$rndkey0
1631 le?vperm $in1,$in1,$in1,$inpperm
1632 vxor $out7,$out7,$rndkey0
1633 mtctr $rounds
1635 vcipher $out0,$out0,v24
1636 stvx_u $in0,$x00,$out
1637 le?vperm $in2,$in2,$in2,$inpperm
1638 vcipher $out1,$out1,v24
1639 stvx_u $in1,$x10,$out
1640 le?vperm $in3,$in3,$in3,$inpperm
1641 vcipher $out2,$out2,v24
1642 stvx_u $in2,$x20,$out
1643 le?vperm $in4,$in4,$in4,$inpperm
1644 vcipher $out3,$out3,v24
1645 stvx_u $in3,$x30,$out
1646 le?vperm $in5,$in5,$in5,$inpperm
1647 vcipher $out4,$out4,v24
1648 stvx_u $in4,$x40,$out
1649 le?vperm $in6,$in6,$in6,$inpperm
1650 vcipher $out5,$out5,v24
1651 stvx_u $in5,$x50,$out
1652 le?vperm $in7,$in7,$in7,$inpperm
1653 vcipher $out6,$out6,v24
1654 stvx_u $in6,$x60,$out
1655 vcipher $out7,$out7,v24
1656 stvx_u $in7,$x70,$out
1657 addi $out,$out,0x80
1659 b Loop_ctr32_enc8x_middle
1661 .align 5
1662 Lctr32_enc8x_break:
1663 cmpwi $len,-0x60
1664 blt Lctr32_enc8x_one
1666 beq Lctr32_enc8x_two
1667 cmpwi $len,-0x40
1668 blt Lctr32_enc8x_three
1670 beq Lctr32_enc8x_four
1671 cmpwi $len,-0x20
1672 blt Lctr32_enc8x_five
1674 beq Lctr32_enc8x_six
1675 cmpwi $len,0x00
1676 blt Lctr32_enc8x_seven
1678 Lctr32_enc8x_eight:
1679 vcipherlast $out0,$out0,$in0
1680 vcipherlast $out1,$out1,$in1
1681 vcipherlast $out2,$out2,$in2
1682 vcipherlast $out3,$out3,$in3
1683 vcipherlast $out4,$out4,$in4
1684 vcipherlast $out5,$out5,$in5
1685 vcipherlast $out6,$out6,$in6
1686 vcipherlast $out7,$out7,$in7
1688 le?vperm $out0,$out0,$out0,$inpperm
1689 le?vperm $out1,$out1,$out1,$inpperm
1690 stvx_u $out0,$x00,$out
1691 le?vperm $out2,$out2,$out2,$inpperm
1692 stvx_u $out1,$x10,$out
1693 le?vperm $out3,$out3,$out3,$inpperm
1694 stvx_u $out2,$x20,$out
1695 le?vperm $out4,$out4,$out4,$inpperm
1696 stvx_u $out3,$x30,$out
1697 le?vperm $out5,$out5,$out5,$inpperm
1698 stvx_u $out4,$x40,$out
1699 le?vperm $out6,$out6,$out6,$inpperm
1700 stvx_u $out5,$x50,$out
1701 le?vperm $out7,$out7,$out7,$inpperm
1702 stvx_u $out6,$x60,$out
1703 stvx_u $out7,$x70,$out
1704 addi $out,$out,0x80
1705 b Lctr32_enc8x_done
1707 .align 5
1708 Lctr32_enc8x_seven:
1709 vcipherlast $out0,$out0,$in1
1710 vcipherlast $out1,$out1,$in2
1711 vcipherlast $out2,$out2,$in3
1712 vcipherlast $out3,$out3,$in4
1713 vcipherlast $out4,$out4,$in5
1714 vcipherlast $out5,$out5,$in6
1715 vcipherlast $out6,$out6,$in7
1717 le?vperm $out0,$out0,$out0,$inpperm
1718 le?vperm $out1,$out1,$out1,$inpperm
1719 stvx_u $out0,$x00,$out
1720 le?vperm $out2,$out2,$out2,$inpperm
1721 stvx_u $out1,$x10,$out
1722 le?vperm $out3,$out3,$out3,$inpperm
1723 stvx_u $out2,$x20,$out
1724 le?vperm $out4,$out4,$out4,$inpperm
1725 stvx_u $out3,$x30,$out
1726 le?vperm $out5,$out5,$out5,$inpperm
1727 stvx_u $out4,$x40,$out
1728 le?vperm $out6,$out6,$out6,$inpperm
1729 stvx_u $out5,$x50,$out
1730 stvx_u $out6,$x60,$out
1731 addi $out,$out,0x70
1732 b Lctr32_enc8x_done
1734 .align 5
1735 Lctr32_enc8x_six:
1736 vcipherlast $out0,$out0,$in2
1737 vcipherlast $out1,$out1,$in3
1738 vcipherlast $out2,$out2,$in4
1739 vcipherlast $out3,$out3,$in5
1740 vcipherlast $out4,$out4,$in6
1741 vcipherlast $out5,$out5,$in7
1743 le?vperm $out0,$out0,$out0,$inpperm
1744 le?vperm $out1,$out1,$out1,$inpperm
1745 stvx_u $out0,$x00,$out
1746 le?vperm $out2,$out2,$out2,$inpperm
1747 stvx_u $out1,$x10,$out
1748 le?vperm $out3,$out3,$out3,$inpperm
1749 stvx_u $out2,$x20,$out
1750 le?vperm $out4,$out4,$out4,$inpperm
1751 stvx_u $out3,$x30,$out
1752 le?vperm $out5,$out5,$out5,$inpperm
1753 stvx_u $out4,$x40,$out
1754 stvx_u $out5,$x50,$out
1755 addi $out,$out,0x60
1756 b Lctr32_enc8x_done
1758 .align 5
1759 Lctr32_enc8x_five:
1760 vcipherlast $out0,$out0,$in3
1761 vcipherlast $out1,$out1,$in4
1762 vcipherlast $out2,$out2,$in5
1763 vcipherlast $out3,$out3,$in6
1764 vcipherlast $out4,$out4,$in7
1766 le?vperm $out0,$out0,$out0,$inpperm
1767 le?vperm $out1,$out1,$out1,$inpperm
1768 stvx_u $out0,$x00,$out
1769 le?vperm $out2,$out2,$out2,$inpperm
1770 stvx_u $out1,$x10,$out
1771 le?vperm $out3,$out3,$out3,$inpperm
1772 stvx_u $out2,$x20,$out
1773 le?vperm $out4,$out4,$out4,$inpperm
1774 stvx_u $out3,$x30,$out
1775 stvx_u $out4,$x40,$out
1776 addi $out,$out,0x50
1777 b Lctr32_enc8x_done
1779 .align 5
1780 Lctr32_enc8x_four:
1781 vcipherlast $out0,$out0,$in4
1782 vcipherlast $out1,$out1,$in5
1783 vcipherlast $out2,$out2,$in6
1784 vcipherlast $out3,$out3,$in7
1786 le?vperm $out0,$out0,$out0,$inpperm
1787 le?vperm $out1,$out1,$out1,$inpperm
1788 stvx_u $out0,$x00,$out
1789 le?vperm $out2,$out2,$out2,$inpperm
1790 stvx_u $out1,$x10,$out
1791 le?vperm $out3,$out3,$out3,$inpperm
1792 stvx_u $out2,$x20,$out
1793 stvx_u $out3,$x30,$out
1794 addi $out,$out,0x40
1795 b Lctr32_enc8x_done
1797 .align 5
1798 Lctr32_enc8x_three:
1799 vcipherlast $out0,$out0,$in5
1800 vcipherlast $out1,$out1,$in6
1801 vcipherlast $out2,$out2,$in7
1803 le?vperm $out0,$out0,$out0,$inpperm
1804 le?vperm $out1,$out1,$out1,$inpperm
1805 stvx_u $out0,$x00,$out
1806 le?vperm $out2,$out2,$out2,$inpperm
1807 stvx_u $out1,$x10,$out
1808 stvx_u $out2,$x20,$out
1809 addi $out,$out,0x30
1810 b Lcbc_dec8x_done
1812 .align 5
1813 Lctr32_enc8x_two:
1814 vcipherlast $out0,$out0,$in6
1815 vcipherlast $out1,$out1,$in7
1817 le?vperm $out0,$out0,$out0,$inpperm
1818 le?vperm $out1,$out1,$out1,$inpperm
1819 stvx_u $out0,$x00,$out
1820 stvx_u $out1,$x10,$out
1821 addi $out,$out,0x20
1822 b Lcbc_dec8x_done
1824 .align 5
1825 Lctr32_enc8x_one:
1826 vcipherlast $out0,$out0,$in7
1828 le?vperm $out0,$out0,$out0,$inpperm
1829 stvx_u $out0,0,$out
1830 addi $out,$out,0x10
1832 Lctr32_enc8x_done:
1833 li r10,`$FRAME+15`
1834 li r11,`$FRAME+31`
1835 stvx $inpperm,r10,$sp # wipe copies of round keys
1836 addi r10,r10,32
1837 stvx $inpperm,r11,$sp
1838 addi r11,r11,32
1839 stvx $inpperm,r10,$sp
1840 addi r10,r10,32
1841 stvx $inpperm,r11,$sp
1842 addi r11,r11,32
1843 stvx $inpperm,r10,$sp
1844 addi r10,r10,32
1845 stvx $inpperm,r11,$sp
1846 addi r11,r11,32
1847 stvx $inpperm,r10,$sp
1848 addi r10,r10,32
1849 stvx $inpperm,r11,$sp
1850 addi r11,r11,32
1852 mtspr 256,$vrsave
1853 lvx v20,r10,$sp # ABI says so
1854 addi r10,r10,32
1855 lvx v21,r11,$sp
1856 addi r11,r11,32
1857 lvx v22,r10,$sp
1858 addi r10,r10,32
1859 lvx v23,r11,$sp
1860 addi r11,r11,32
1861 lvx v24,r10,$sp
1862 addi r10,r10,32
1863 lvx v25,r11,$sp
1864 addi r11,r11,32
1865 lvx v26,r10,$sp
1866 addi r10,r10,32
1867 lvx v27,r11,$sp
1868 addi r11,r11,32
1869 lvx v28,r10,$sp
1870 addi r10,r10,32
1871 lvx v29,r11,$sp
1872 addi r11,r11,32
1873 lvx v30,r10,$sp
1874 lvx v31,r11,$sp
1875 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1876 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1877 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1878 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1879 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1880 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1881 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1883 .long 0
1884 .byte 0,12,0x04,0,0x80,6,6,0
1885 .long 0
1886 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1888 }} }}}
1890 my $consts=1;
1891 foreach(split("\n",$code)) {
1892 s/\`([^\`]*)\`/eval($1)/geo;
1894 # constants table endian-specific conversion
1895 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1896 my $conv=$3;
1897 my @bytes=();
1899 # convert to endian-agnostic format
1900 if ($1 eq "long") {
1901 foreach (split(/,\s*/,$2)) {
1902 my $l = /^0/?oct:int;
1903 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1905 } else {
1906 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1909 # little-endian conversion
1910 if ($flavour =~ /le$/o) {
1911 SWITCH: for($conv) {
1912 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1913 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1917 #emit
1918 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1919 next;
1921 $consts=0 if (m/Lconsts:/o); # end of table
1923 # instructions prefixed with '?' are endian-specific and need
1924 # to be adjusted accordingly...
1925 if ($flavour =~ /le$/o) { # little-endian
1926 s/le\?//o or
1927 s/be\?/#be#/o or
1928 s/\?lvsr/lvsl/o or
1929 s/\?lvsl/lvsr/o or
1930 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1931 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1932 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1933 } else { # big-endian
1934 s/le\?/#le#/o or
1935 s/be\?//o or
1936 s/\?([a-z]+)/$1/o;
1939 print $_,"\n";
1942 close STDOUT;