3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
26 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
32 $code.=<<___
if ($::abibits
==64);
33 .register
%g2,#scratch
34 .register
%g3,#scratch
41 { my ($inp,$out)=("%o0","%o1");
45 .globl des_t4_key_expand
46 .type des_t4_key_expand
,#function
49 alignaddr
$inp, %g0, $inp
51 ldd
[$inp + 0x00], %f0
52 ldd
[$inp + 0x08], %f2
53 faligndata
%f0, %f2, %f0
54 1: des_kexpand
%f0, 0, %f0
55 des_kexpand
%f0, 1, %f2
56 std
%f0, [$out + 0x00]
57 des_kexpand
%f2, 3, %f6
58 std
%f2, [$out + 0x08]
59 des_kexpand
%f2, 2, %f4
60 des_kexpand
%f6, 3, %f10
61 std
%f6, [$out + 0x18]
62 des_kexpand
%f6, 2, %f8
63 std
%f4, [$out + 0x10]
64 des_kexpand
%f10, 3, %f14
65 std
%f10, [$out + 0x28]
66 des_kexpand
%f10, 2, %f12
67 std
%f8, [$out + 0x20]
68 des_kexpand
%f14, 1, %f16
69 std
%f14, [$out + 0x38]
70 des_kexpand
%f16, 3, %f20
71 std
%f12, [$out + 0x30]
72 des_kexpand
%f16, 2, %f18
73 std
%f16, [$out + 0x40]
74 des_kexpand
%f20, 3, %f24
75 std
%f20, [$out + 0x50]
76 des_kexpand
%f20, 2, %f22
77 std
%f18, [$out + 0x48]
78 des_kexpand
%f24, 3, %f28
79 std
%f24, [$out + 0x60]
80 des_kexpand
%f24, 2, %f26
81 std
%f22, [$out + 0x58]
82 des_kexpand
%f28, 1, %f30
83 std
%f28, [$out + 0x70]
84 std
%f26, [$out + 0x68]
86 std
%f30, [$out + 0x78]
87 .size des_t4_key_expand
,.-des_t4_key_expand
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
94 .globl des_t4_cbc_encrypt
98 be
,pn
$::size_t_cc
, .Lcbc_abort
100 ld
[$ivec + 0], %f0 ! load ivec
105 sll
$ileft, 3, $ileft
108 prefetch
[$inp + 63], 20
109 sub %g0, $ileft, $iright
111 alignaddrl
$out, %g0, $out
112 srl
$omask, %g4, $omask
117 ldd
[$key + 0x00], %f4 ! load key schedule
118 ldd
[$key + 0x08], %f6
119 ldd
[$key + 0x10], %f8
120 ldd
[$key + 0x18], %f10
121 ldd
[$key + 0x20], %f12
122 ldd
[$key + 0x28], %f14
123 ldd
[$key + 0x30], %f16
124 ldd
[$key + 0x38], %f18
125 ldd
[$key + 0x40], %f20
126 ldd
[$key + 0x48], %f22
127 ldd
[$key + 0x50], %f24
128 ldd
[$key + 0x58], %f26
129 ldd
[$key + 0x60], %f28
130 ldd
[$key + 0x68], %f30
131 ldd
[$key + 0x70], %f32
132 ldd
[$key + 0x78], %f34
140 sllx
%g4, $ileft, %g4
141 srlx
%g5, $iright, %g5
145 prefetch
[$inp + 8+63], 20
147 fxor
%f2, %f0, %f0 ! ^= ivec
148 prefetch
[$out + 63], 22
151 des_round
%f4, %f6, %f0, %f0
152 des_round
%f8, %f10, %f0, %f0
153 des_round
%f12, %f14, %f0, %f0
154 des_round
%f16, %f18, %f0, %f0
155 des_round
%f20, %f22, %f0, %f0
156 des_round
%f24, %f26, %f0, %f0
157 des_round
%f28, %f30, %f0, %f0
158 des_round
%f32, %f34, %f0, %f0
165 brnz
,pt
$len, .Ldes_cbc_enc_loop
168 st
%f0, [$ivec + 0] ! write out ivec
176 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
177 ! and ~4x deterioration
179 faligndata
%f0, %f0, %f2 ! handle unaligned output
181 stda
%f2, [$out + $omask]0xc0 ! partial store
183 orn
%g0, $omask, $omask
184 stda
%f2, [$out + $omask]0xc0 ! partial store
186 brnz
,pt
$len, .Ldes_cbc_enc_loop
+4
187 orn
%g0, $omask, $omask
189 st
%f0, [$ivec + 0] ! write out ivec
192 .type des_t4_cbc_encrypt
,#function
193 .size des_t4_cbc_encrypt
,.-des_t4_cbc_encrypt
195 .globl des_t4_cbc_decrypt
199 be
,pn
$::size_t_cc
, .Lcbc_abort
201 ld
[$ivec + 0], %f2 ! load ivec
206 sll
$ileft, 3, $ileft
209 prefetch
[$inp + 63], 20
210 sub %g0, $ileft, $iright
212 alignaddrl
$out, %g0, $out
213 srl
$omask, %g4, $omask
218 ldd
[$key + 0x78], %f4 ! load key schedule
219 ldd
[$key + 0x70], %f6
220 ldd
[$key + 0x68], %f8
221 ldd
[$key + 0x60], %f10
222 ldd
[$key + 0x58], %f12
223 ldd
[$key + 0x50], %f14
224 ldd
[$key + 0x48], %f16
225 ldd
[$key + 0x40], %f18
226 ldd
[$key + 0x38], %f20
227 ldd
[$key + 0x30], %f22
228 ldd
[$key + 0x28], %f24
229 ldd
[$key + 0x20], %f26
230 ldd
[$key + 0x18], %f28
231 ldd
[$key + 0x10], %f30
232 ldd
[$key + 0x08], %f32
233 ldd
[$key + 0x00], %f34
241 sllx
%g4, $ileft, %g4
242 srlx
%g5, $iright, %g5
246 prefetch
[$inp + 8+63], 20
248 prefetch
[$out + 63], 22
251 des_round
%f4, %f6, %f0, %f0
252 des_round
%f8, %f10, %f0, %f0
253 des_round
%f12, %f14, %f0, %f0
254 des_round
%f16, %f18, %f0, %f0
255 des_round
%f20, %f22, %f0, %f0
256 des_round
%f24, %f26, %f0, %f0
257 des_round
%f28, %f30, %f0, %f0
258 des_round
%f32, %f34, %f0, %f0
261 fxor
%f2, %f0, %f0 ! ^= ivec
268 brnz
,pt
$len, .Ldes_cbc_dec_loop
271 st
%f2, [$ivec + 0] ! write out ivec
276 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
277 ! and ~4x deterioration
279 faligndata
%f0, %f0, %f0 ! handle unaligned output
281 stda
%f0, [$out + $omask]0xc0 ! partial store
283 orn
%g0, $omask, $omask
284 stda
%f0, [$out + $omask]0xc0 ! partial store
286 brnz
,pt
$len, .Ldes_cbc_dec_loop
+4
287 orn
%g0, $omask, $omask
289 st
%f2, [$ivec + 0] ! write out ivec
292 .type des_t4_cbc_decrypt
,#function
293 .size des_t4_cbc_decrypt
,.-des_t4_cbc_decrypt
296 # One might wonder why does one have back-to-back des_iip/des_ip
297 # pairs between EDE passes. Indeed, aren't they inverse of each other?
298 # They almost are. Outcome of the pair is 32-bit words being swapped
299 # in target register. Consider pair of des_iip/des_ip as a way to
300 # perform the due swap, it's actually fastest way in this case.
303 .globl des_t4_ede3_cbc_encrypt
305 des_t4_ede3_cbc_encrypt
:
307 be
,pn
$::size_t_cc
, .Lcbc_abort
309 ld
[$ivec + 0], %f0 ! load ivec
314 sll
$ileft, 3, $ileft
317 prefetch
[$inp + 63], 20
318 sub %g0, $ileft, $iright
320 alignaddrl
$out, %g0, $out
321 srl
$omask, %g4, $omask
326 ldd
[$key + 0x00], %f4 ! load key schedule
327 ldd
[$key + 0x08], %f6
328 ldd
[$key + 0x10], %f8
329 ldd
[$key + 0x18], %f10
330 ldd
[$key + 0x20], %f12
331 ldd
[$key + 0x28], %f14
332 ldd
[$key + 0x30], %f16
333 ldd
[$key + 0x38], %f18
334 ldd
[$key + 0x40], %f20
335 ldd
[$key + 0x48], %f22
336 ldd
[$key + 0x50], %f24
337 ldd
[$key + 0x58], %f26
338 ldd
[$key + 0x60], %f28
339 ldd
[$key + 0x68], %f30
340 ldd
[$key + 0x70], %f32
341 ldd
[$key + 0x78], %f34
343 .Ldes_ede3_cbc_enc_loop
:
349 sllx
%g4, $ileft, %g4
350 srlx
%g5, $iright, %g5
354 prefetch
[$inp + 8+63], 20
356 fxor
%f2, %f0, %f0 ! ^= ivec
357 prefetch
[$out + 63], 22
360 des_round
%f4, %f6, %f0, %f0
361 des_round
%f8, %f10, %f0, %f0
362 des_round
%f12, %f14, %f0, %f0
363 des_round
%f16, %f18, %f0, %f0
364 ldd
[$key + 0x100-0x08], %f36
365 ldd
[$key + 0x100-0x10], %f38
366 des_round
%f20, %f22, %f0, %f0
367 ldd
[$key + 0x100-0x18], %f40
368 ldd
[$key + 0x100-0x20], %f42
369 des_round
%f24, %f26, %f0, %f0
370 ldd
[$key + 0x100-0x28], %f44
371 ldd
[$key + 0x100-0x30], %f46
372 des_round
%f28, %f30, %f0, %f0
373 ldd
[$key + 0x100-0x38], %f48
374 ldd
[$key + 0x100-0x40], %f50
375 des_round
%f32, %f34, %f0, %f0
376 ldd
[$key + 0x100-0x48], %f52
377 ldd
[$key + 0x100-0x50], %f54
380 ldd
[$key + 0x100-0x58], %f56
381 ldd
[$key + 0x100-0x60], %f58
383 ldd
[$key + 0x100-0x68], %f60
384 ldd
[$key + 0x100-0x70], %f62
385 des_round
%f36, %f38, %f0, %f0
386 ldd
[$key + 0x100-0x78], %f36
387 ldd
[$key + 0x100-0x80], %f38
388 des_round
%f40, %f42, %f0, %f0
389 des_round
%f44, %f46, %f0, %f0
390 des_round
%f48, %f50, %f0, %f0
391 ldd
[$key + 0x100+0x00], %f40
392 ldd
[$key + 0x100+0x08], %f42
393 des_round
%f52, %f54, %f0, %f0
394 ldd
[$key + 0x100+0x10], %f44
395 ldd
[$key + 0x100+0x18], %f46
396 des_round
%f56, %f58, %f0, %f0
397 ldd
[$key + 0x100+0x20], %f48
398 ldd
[$key + 0x100+0x28], %f50
399 des_round
%f60, %f62, %f0, %f0
400 ldd
[$key + 0x100+0x30], %f52
401 ldd
[$key + 0x100+0x38], %f54
402 des_round
%f36, %f38, %f0, %f0
403 ldd
[$key + 0x100+0x40], %f56
404 ldd
[$key + 0x100+0x48], %f58
407 ldd
[$key + 0x100+0x50], %f60
408 ldd
[$key + 0x100+0x58], %f62
410 ldd
[$key + 0x100+0x60], %f36
411 ldd
[$key + 0x100+0x68], %f38
412 des_round
%f40, %f42, %f0, %f0
413 ldd
[$key + 0x100+0x70], %f40
414 ldd
[$key + 0x100+0x78], %f42
415 des_round
%f44, %f46, %f0, %f0
416 des_round
%f48, %f50, %f0, %f0
417 des_round
%f52, %f54, %f0, %f0
418 des_round
%f56, %f58, %f0, %f0
419 des_round
%f60, %f62, %f0, %f0
420 des_round
%f36, %f38, %f0, %f0
421 des_round
%f40, %f42, %f0, %f0
428 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
431 st
%f0, [$ivec + 0] ! write out ivec
436 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
437 ! and ~2x deterioration
439 faligndata
%f0, %f0, %f2 ! handle unaligned output
441 stda
%f2, [$out + $omask]0xc0 ! partial store
443 orn
%g0, $omask, $omask
444 stda
%f2, [$out + $omask]0xc0 ! partial store
446 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
+4
447 orn
%g0, $omask, $omask
449 st
%f0, [$ivec + 0] ! write out ivec
452 .type des_t4_ede3_cbc_encrypt
,#function
453 .size des_t4_ede3_cbc_encrypt
,.-des_t4_ede3_cbc_encrypt
455 .globl des_t4_ede3_cbc_decrypt
457 des_t4_ede3_cbc_decrypt
:
459 be
,pn
$::size_t_cc
, .Lcbc_abort
461 ld
[$ivec + 0], %f2 ! load ivec
466 sll
$ileft, 3, $ileft
469 prefetch
[$inp + 63], 20
470 sub %g0, $ileft, $iright
472 alignaddrl
$out, %g0, $out
473 srl
$omask, %g4, $omask
478 ldd
[$key + 0x100+0x78], %f4 ! load key schedule
479 ldd
[$key + 0x100+0x70], %f6
480 ldd
[$key + 0x100+0x68], %f8
481 ldd
[$key + 0x100+0x60], %f10
482 ldd
[$key + 0x100+0x58], %f12
483 ldd
[$key + 0x100+0x50], %f14
484 ldd
[$key + 0x100+0x48], %f16
485 ldd
[$key + 0x100+0x40], %f18
486 ldd
[$key + 0x100+0x38], %f20
487 ldd
[$key + 0x100+0x30], %f22
488 ldd
[$key + 0x100+0x28], %f24
489 ldd
[$key + 0x100+0x20], %f26
490 ldd
[$key + 0x100+0x18], %f28
491 ldd
[$key + 0x100+0x10], %f30
492 ldd
[$key + 0x100+0x08], %f32
493 ldd
[$key + 0x100+0x00], %f34
495 .Ldes_ede3_cbc_dec_loop
:
501 sllx
%g4, $ileft, %g4
502 srlx
%g5, $iright, %g5
506 prefetch
[$inp + 8+63], 20
508 prefetch
[$out + 63], 22
511 des_round
%f4, %f6, %f0, %f0
512 des_round
%f8, %f10, %f0, %f0
513 des_round
%f12, %f14, %f0, %f0
514 des_round
%f16, %f18, %f0, %f0
515 ldd
[$key + 0x80+0x00], %f36
516 ldd
[$key + 0x80+0x08], %f38
517 des_round
%f20, %f22, %f0, %f0
518 ldd
[$key + 0x80+0x10], %f40
519 ldd
[$key + 0x80+0x18], %f42
520 des_round
%f24, %f26, %f0, %f0
521 ldd
[$key + 0x80+0x20], %f44
522 ldd
[$key + 0x80+0x28], %f46
523 des_round
%f28, %f30, %f0, %f0
524 ldd
[$key + 0x80+0x30], %f48
525 ldd
[$key + 0x80+0x38], %f50
526 des_round
%f32, %f34, %f0, %f0
527 ldd
[$key + 0x80+0x40], %f52
528 ldd
[$key + 0x80+0x48], %f54
531 ldd
[$key + 0x80+0x50], %f56
532 ldd
[$key + 0x80+0x58], %f58
534 ldd
[$key + 0x80+0x60], %f60
535 ldd
[$key + 0x80+0x68], %f62
536 des_round
%f36, %f38, %f0, %f0
537 ldd
[$key + 0x80+0x70], %f36
538 ldd
[$key + 0x80+0x78], %f38
539 des_round
%f40, %f42, %f0, %f0
540 des_round
%f44, %f46, %f0, %f0
541 des_round
%f48, %f50, %f0, %f0
542 ldd
[$key + 0x80-0x08], %f40
543 ldd
[$key + 0x80-0x10], %f42
544 des_round
%f52, %f54, %f0, %f0
545 ldd
[$key + 0x80-0x18], %f44
546 ldd
[$key + 0x80-0x20], %f46
547 des_round
%f56, %f58, %f0, %f0
548 ldd
[$key + 0x80-0x28], %f48
549 ldd
[$key + 0x80-0x30], %f50
550 des_round
%f60, %f62, %f0, %f0
551 ldd
[$key + 0x80-0x38], %f52
552 ldd
[$key + 0x80-0x40], %f54
553 des_round
%f36, %f38, %f0, %f0
554 ldd
[$key + 0x80-0x48], %f56
555 ldd
[$key + 0x80-0x50], %f58
558 ldd
[$key + 0x80-0x58], %f60
559 ldd
[$key + 0x80-0x60], %f62
561 ldd
[$key + 0x80-0x68], %f36
562 ldd
[$key + 0x80-0x70], %f38
563 des_round
%f40, %f42, %f0, %f0
564 ldd
[$key + 0x80-0x78], %f40
565 ldd
[$key + 0x80-0x80], %f42
566 des_round
%f44, %f46, %f0, %f0
567 des_round
%f48, %f50, %f0, %f0
568 des_round
%f52, %f54, %f0, %f0
569 des_round
%f56, %f58, %f0, %f0
570 des_round
%f60, %f62, %f0, %f0
571 des_round
%f36, %f38, %f0, %f0
572 des_round
%f40, %f42, %f0, %f0
575 fxor
%f2, %f0, %f0 ! ^= ivec
582 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
585 st
%f2, [$ivec + 0] ! write out ivec
590 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
591 ! and ~3x deterioration
593 faligndata
%f0, %f0, %f0 ! handle unaligned output
595 stda
%f0, [$out + $omask]0xc0 ! partial store
597 orn
%g0, $omask, $omask
598 stda
%f0, [$out + $omask]0xc0 ! partial store
600 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
+4
601 orn
%g0, $omask, $omask
603 st
%f2, [$ivec + 0] ! write out ivec
606 .type des_t4_ede3_cbc_decrypt
,#function
607 .size des_t4_ede3_cbc_decrypt
,.-des_t4_ede3_cbc_decrypt
611 .asciz
"DES for SPARC T4, David S. Miller, Andy Polyakov"