OpenSSL: update to 1.0.2a
[tomato.git] / release / src-rt-6.x.4708 / router / openssl / crypto / des / asm / dest4-sparcv9.pl
blob1dc60243d4fbda91314f2d2dc33ca1eb0aa1b6c3
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
10 # DES for SPARC T4.
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
15 # DES Triple-DES
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
20 # misaligned data;
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
24 # are better[?];
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
30 &asm_init(@ARGV);
32 $code.=<<___ if ($::abibits==64);
33 .register %g2,#scratch
34 .register %g3,#scratch
35 ___
37 $code.=<<___;
38 .text
39 ___
41 { my ($inp,$out)=("%o0","%o1");
43 $code.=<<___;
44 .align 32
45 .globl des_t4_key_expand
46 .type des_t4_key_expand,#function
47 des_t4_key_expand:
48 andcc $inp, 0x7, %g0
49 alignaddr $inp, %g0, $inp
50 bz,pt %icc, 1f
51 ldd [$inp + 0x00], %f0
52 ldd [$inp + 0x08], %f2
53 faligndata %f0, %f2, %f0
54 1: des_kexpand %f0, 0, %f0
55 des_kexpand %f0, 1, %f2
56 std %f0, [$out + 0x00]
57 des_kexpand %f2, 3, %f6
58 std %f2, [$out + 0x08]
59 des_kexpand %f2, 2, %f4
60 des_kexpand %f6, 3, %f10
61 std %f6, [$out + 0x18]
62 des_kexpand %f6, 2, %f8
63 std %f4, [$out + 0x10]
64 des_kexpand %f10, 3, %f14
65 std %f10, [$out + 0x28]
66 des_kexpand %f10, 2, %f12
67 std %f8, [$out + 0x20]
68 des_kexpand %f14, 1, %f16
69 std %f14, [$out + 0x38]
70 des_kexpand %f16, 3, %f20
71 std %f12, [$out + 0x30]
72 des_kexpand %f16, 2, %f18
73 std %f16, [$out + 0x40]
74 des_kexpand %f20, 3, %f24
75 std %f20, [$out + 0x50]
76 des_kexpand %f20, 2, %f22
77 std %f18, [$out + 0x48]
78 des_kexpand %f24, 3, %f28
79 std %f24, [$out + 0x60]
80 des_kexpand %f24, 2, %f26
81 std %f22, [$out + 0x58]
82 des_kexpand %f28, 1, %f30
83 std %f28, [$out + 0x70]
84 std %f26, [$out + 0x68]
85 retl
86 std %f30, [$out + 0x78]
87 .size des_t4_key_expand,.-des_t4_key_expand
88 ___
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
93 $code.=<<___;
94 .globl des_t4_cbc_encrypt
95 .align 32
96 des_t4_cbc_encrypt:
97 cmp $len, 0
98 be,pn $::size_t_cc, .Lcbc_abort
99 nop
100 ld [$ivec + 0], %f0 ! load ivec
101 ld [$ivec + 4], %f1
103 and $inp, 7, $ileft
104 andn $inp, 7, $inp
105 sll $ileft, 3, $ileft
106 mov 0xff, $omask
107 prefetch [$inp], 20
108 prefetch [$inp + 63], 20
109 sub %g0, $ileft, $iright
110 and $out, 7, %g4
111 alignaddrl $out, %g0, $out
112 srl $omask, %g4, $omask
113 srlx $len, 3, $len
114 movrz %g4, 0, $omask
115 prefetch [$out], 22
117 ldd [$key + 0x00], %f4 ! load key schedule
118 ldd [$key + 0x08], %f6
119 ldd [$key + 0x10], %f8
120 ldd [$key + 0x18], %f10
121 ldd [$key + 0x20], %f12
122 ldd [$key + 0x28], %f14
123 ldd [$key + 0x30], %f16
124 ldd [$key + 0x38], %f18
125 ldd [$key + 0x40], %f20
126 ldd [$key + 0x48], %f22
127 ldd [$key + 0x50], %f24
128 ldd [$key + 0x58], %f26
129 ldd [$key + 0x60], %f28
130 ldd [$key + 0x68], %f30
131 ldd [$key + 0x70], %f32
132 ldd [$key + 0x78], %f34
134 .Ldes_cbc_enc_loop:
135 ldx [$inp + 0], %g4
136 brz,pt $ileft, 4f
139 ldx [$inp + 8], %g5
140 sllx %g4, $ileft, %g4
141 srlx %g5, $iright, %g5
142 or %g5, %g4, %g4
144 movxtod %g4, %f2
145 prefetch [$inp + 8+63], 20
146 add $inp, 8, $inp
147 fxor %f2, %f0, %f0 ! ^= ivec
148 prefetch [$out + 63], 22
150 des_ip %f0, %f0
151 des_round %f4, %f6, %f0, %f0
152 des_round %f8, %f10, %f0, %f0
153 des_round %f12, %f14, %f0, %f0
154 des_round %f16, %f18, %f0, %f0
155 des_round %f20, %f22, %f0, %f0
156 des_round %f24, %f26, %f0, %f0
157 des_round %f28, %f30, %f0, %f0
158 des_round %f32, %f34, %f0, %f0
159 des_iip %f0, %f0
161 brnz,pn $omask, 2f
162 sub $len, 1, $len
164 std %f0, [$out + 0]
165 brnz,pt $len, .Ldes_cbc_enc_loop
166 add $out, 8, $out
168 st %f0, [$ivec + 0] ! write out ivec
169 retl
170 st %f1, [$ivec + 4]
171 .Lcbc_abort:
172 retl
175 .align 16
176 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
177 ! and ~4x deterioration
178 ! in inp==out case
179 faligndata %f0, %f0, %f2 ! handle unaligned output
181 stda %f2, [$out + $omask]0xc0 ! partial store
182 add $out, 8, $out
183 orn %g0, $omask, $omask
184 stda %f2, [$out + $omask]0xc0 ! partial store
186 brnz,pt $len, .Ldes_cbc_enc_loop+4
187 orn %g0, $omask, $omask
189 st %f0, [$ivec + 0] ! write out ivec
190 retl
191 st %f1, [$ivec + 4]
192 .type des_t4_cbc_encrypt,#function
193 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
195 .globl des_t4_cbc_decrypt
196 .align 32
197 des_t4_cbc_decrypt:
198 cmp $len, 0
199 be,pn $::size_t_cc, .Lcbc_abort
201 ld [$ivec + 0], %f2 ! load ivec
202 ld [$ivec + 4], %f3
204 and $inp, 7, $ileft
205 andn $inp, 7, $inp
206 sll $ileft, 3, $ileft
207 mov 0xff, $omask
208 prefetch [$inp], 20
209 prefetch [$inp + 63], 20
210 sub %g0, $ileft, $iright
211 and $out, 7, %g4
212 alignaddrl $out, %g0, $out
213 srl $omask, %g4, $omask
214 srlx $len, 3, $len
215 movrz %g4, 0, $omask
216 prefetch [$out], 22
218 ldd [$key + 0x78], %f4 ! load key schedule
219 ldd [$key + 0x70], %f6
220 ldd [$key + 0x68], %f8
221 ldd [$key + 0x60], %f10
222 ldd [$key + 0x58], %f12
223 ldd [$key + 0x50], %f14
224 ldd [$key + 0x48], %f16
225 ldd [$key + 0x40], %f18
226 ldd [$key + 0x38], %f20
227 ldd [$key + 0x30], %f22
228 ldd [$key + 0x28], %f24
229 ldd [$key + 0x20], %f26
230 ldd [$key + 0x18], %f28
231 ldd [$key + 0x10], %f30
232 ldd [$key + 0x08], %f32
233 ldd [$key + 0x00], %f34
235 .Ldes_cbc_dec_loop:
236 ldx [$inp + 0], %g4
237 brz,pt $ileft, 4f
240 ldx [$inp + 8], %g5
241 sllx %g4, $ileft, %g4
242 srlx %g5, $iright, %g5
243 or %g5, %g4, %g4
245 movxtod %g4, %f0
246 prefetch [$inp + 8+63], 20
247 add $inp, 8, $inp
248 prefetch [$out + 63], 22
250 des_ip %f0, %f0
251 des_round %f4, %f6, %f0, %f0
252 des_round %f8, %f10, %f0, %f0
253 des_round %f12, %f14, %f0, %f0
254 des_round %f16, %f18, %f0, %f0
255 des_round %f20, %f22, %f0, %f0
256 des_round %f24, %f26, %f0, %f0
257 des_round %f28, %f30, %f0, %f0
258 des_round %f32, %f34, %f0, %f0
259 des_iip %f0, %f0
261 fxor %f2, %f0, %f0 ! ^= ivec
262 movxtod %g4, %f2
264 brnz,pn $omask, 2f
265 sub $len, 1, $len
267 std %f0, [$out + 0]
268 brnz,pt $len, .Ldes_cbc_dec_loop
269 add $out, 8, $out
271 st %f2, [$ivec + 0] ! write out ivec
272 retl
273 st %f3, [$ivec + 4]
275 .align 16
276 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
277 ! and ~4x deterioration
278 ! in inp==out case
279 faligndata %f0, %f0, %f0 ! handle unaligned output
281 stda %f0, [$out + $omask]0xc0 ! partial store
282 add $out, 8, $out
283 orn %g0, $omask, $omask
284 stda %f0, [$out + $omask]0xc0 ! partial store
286 brnz,pt $len, .Ldes_cbc_dec_loop+4
287 orn %g0, $omask, $omask
289 st %f2, [$ivec + 0] ! write out ivec
290 retl
291 st %f3, [$ivec + 4]
292 .type des_t4_cbc_decrypt,#function
293 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
296 # One might wonder why does one have back-to-back des_iip/des_ip
297 # pairs between EDE passes. Indeed, aren't they inverse of each other?
298 # They almost are. Outcome of the pair is 32-bit words being swapped
299 # in target register. Consider pair of des_iip/des_ip as a way to
300 # perform the due swap, it's actually fastest way in this case.
302 $code.=<<___;
303 .globl des_t4_ede3_cbc_encrypt
304 .align 32
305 des_t4_ede3_cbc_encrypt:
306 cmp $len, 0
307 be,pn $::size_t_cc, .Lcbc_abort
309 ld [$ivec + 0], %f0 ! load ivec
310 ld [$ivec + 4], %f1
312 and $inp, 7, $ileft
313 andn $inp, 7, $inp
314 sll $ileft, 3, $ileft
315 mov 0xff, $omask
316 prefetch [$inp], 20
317 prefetch [$inp + 63], 20
318 sub %g0, $ileft, $iright
319 and $out, 7, %g4
320 alignaddrl $out, %g0, $out
321 srl $omask, %g4, $omask
322 srlx $len, 3, $len
323 movrz %g4, 0, $omask
324 prefetch [$out], 22
326 ldd [$key + 0x00], %f4 ! load key schedule
327 ldd [$key + 0x08], %f6
328 ldd [$key + 0x10], %f8
329 ldd [$key + 0x18], %f10
330 ldd [$key + 0x20], %f12
331 ldd [$key + 0x28], %f14
332 ldd [$key + 0x30], %f16
333 ldd [$key + 0x38], %f18
334 ldd [$key + 0x40], %f20
335 ldd [$key + 0x48], %f22
336 ldd [$key + 0x50], %f24
337 ldd [$key + 0x58], %f26
338 ldd [$key + 0x60], %f28
339 ldd [$key + 0x68], %f30
340 ldd [$key + 0x70], %f32
341 ldd [$key + 0x78], %f34
343 .Ldes_ede3_cbc_enc_loop:
344 ldx [$inp + 0], %g4
345 brz,pt $ileft, 4f
348 ldx [$inp + 8], %g5
349 sllx %g4, $ileft, %g4
350 srlx %g5, $iright, %g5
351 or %g5, %g4, %g4
353 movxtod %g4, %f2
354 prefetch [$inp + 8+63], 20
355 add $inp, 8, $inp
356 fxor %f2, %f0, %f0 ! ^= ivec
357 prefetch [$out + 63], 22
359 des_ip %f0, %f0
360 des_round %f4, %f6, %f0, %f0
361 des_round %f8, %f10, %f0, %f0
362 des_round %f12, %f14, %f0, %f0
363 des_round %f16, %f18, %f0, %f0
364 ldd [$key + 0x100-0x08], %f36
365 ldd [$key + 0x100-0x10], %f38
366 des_round %f20, %f22, %f0, %f0
367 ldd [$key + 0x100-0x18], %f40
368 ldd [$key + 0x100-0x20], %f42
369 des_round %f24, %f26, %f0, %f0
370 ldd [$key + 0x100-0x28], %f44
371 ldd [$key + 0x100-0x30], %f46
372 des_round %f28, %f30, %f0, %f0
373 ldd [$key + 0x100-0x38], %f48
374 ldd [$key + 0x100-0x40], %f50
375 des_round %f32, %f34, %f0, %f0
376 ldd [$key + 0x100-0x48], %f52
377 ldd [$key + 0x100-0x50], %f54
378 des_iip %f0, %f0
380 ldd [$key + 0x100-0x58], %f56
381 ldd [$key + 0x100-0x60], %f58
382 des_ip %f0, %f0
383 ldd [$key + 0x100-0x68], %f60
384 ldd [$key + 0x100-0x70], %f62
385 des_round %f36, %f38, %f0, %f0
386 ldd [$key + 0x100-0x78], %f36
387 ldd [$key + 0x100-0x80], %f38
388 des_round %f40, %f42, %f0, %f0
389 des_round %f44, %f46, %f0, %f0
390 des_round %f48, %f50, %f0, %f0
391 ldd [$key + 0x100+0x00], %f40
392 ldd [$key + 0x100+0x08], %f42
393 des_round %f52, %f54, %f0, %f0
394 ldd [$key + 0x100+0x10], %f44
395 ldd [$key + 0x100+0x18], %f46
396 des_round %f56, %f58, %f0, %f0
397 ldd [$key + 0x100+0x20], %f48
398 ldd [$key + 0x100+0x28], %f50
399 des_round %f60, %f62, %f0, %f0
400 ldd [$key + 0x100+0x30], %f52
401 ldd [$key + 0x100+0x38], %f54
402 des_round %f36, %f38, %f0, %f0
403 ldd [$key + 0x100+0x40], %f56
404 ldd [$key + 0x100+0x48], %f58
405 des_iip %f0, %f0
407 ldd [$key + 0x100+0x50], %f60
408 ldd [$key + 0x100+0x58], %f62
409 des_ip %f0, %f0
410 ldd [$key + 0x100+0x60], %f36
411 ldd [$key + 0x100+0x68], %f38
412 des_round %f40, %f42, %f0, %f0
413 ldd [$key + 0x100+0x70], %f40
414 ldd [$key + 0x100+0x78], %f42
415 des_round %f44, %f46, %f0, %f0
416 des_round %f48, %f50, %f0, %f0
417 des_round %f52, %f54, %f0, %f0
418 des_round %f56, %f58, %f0, %f0
419 des_round %f60, %f62, %f0, %f0
420 des_round %f36, %f38, %f0, %f0
421 des_round %f40, %f42, %f0, %f0
422 des_iip %f0, %f0
424 brnz,pn $omask, 2f
425 sub $len, 1, $len
427 std %f0, [$out + 0]
428 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
429 add $out, 8, $out
431 st %f0, [$ivec + 0] ! write out ivec
432 retl
433 st %f1, [$ivec + 4]
435 .align 16
436 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
437 ! and ~2x deterioration
438 ! in inp==out case
439 faligndata %f0, %f0, %f2 ! handle unaligned output
441 stda %f2, [$out + $omask]0xc0 ! partial store
442 add $out, 8, $out
443 orn %g0, $omask, $omask
444 stda %f2, [$out + $omask]0xc0 ! partial store
446 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
447 orn %g0, $omask, $omask
449 st %f0, [$ivec + 0] ! write out ivec
450 retl
451 st %f1, [$ivec + 4]
452 .type des_t4_ede3_cbc_encrypt,#function
453 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
455 .globl des_t4_ede3_cbc_decrypt
456 .align 32
457 des_t4_ede3_cbc_decrypt:
458 cmp $len, 0
459 be,pn $::size_t_cc, .Lcbc_abort
461 ld [$ivec + 0], %f2 ! load ivec
462 ld [$ivec + 4], %f3
464 and $inp, 7, $ileft
465 andn $inp, 7, $inp
466 sll $ileft, 3, $ileft
467 mov 0xff, $omask
468 prefetch [$inp], 20
469 prefetch [$inp + 63], 20
470 sub %g0, $ileft, $iright
471 and $out, 7, %g4
472 alignaddrl $out, %g0, $out
473 srl $omask, %g4, $omask
474 srlx $len, 3, $len
475 movrz %g4, 0, $omask
476 prefetch [$out], 22
478 ldd [$key + 0x100+0x78], %f4 ! load key schedule
479 ldd [$key + 0x100+0x70], %f6
480 ldd [$key + 0x100+0x68], %f8
481 ldd [$key + 0x100+0x60], %f10
482 ldd [$key + 0x100+0x58], %f12
483 ldd [$key + 0x100+0x50], %f14
484 ldd [$key + 0x100+0x48], %f16
485 ldd [$key + 0x100+0x40], %f18
486 ldd [$key + 0x100+0x38], %f20
487 ldd [$key + 0x100+0x30], %f22
488 ldd [$key + 0x100+0x28], %f24
489 ldd [$key + 0x100+0x20], %f26
490 ldd [$key + 0x100+0x18], %f28
491 ldd [$key + 0x100+0x10], %f30
492 ldd [$key + 0x100+0x08], %f32
493 ldd [$key + 0x100+0x00], %f34
495 .Ldes_ede3_cbc_dec_loop:
496 ldx [$inp + 0], %g4
497 brz,pt $ileft, 4f
500 ldx [$inp + 8], %g5
501 sllx %g4, $ileft, %g4
502 srlx %g5, $iright, %g5
503 or %g5, %g4, %g4
505 movxtod %g4, %f0
506 prefetch [$inp + 8+63], 20
507 add $inp, 8, $inp
508 prefetch [$out + 63], 22
510 des_ip %f0, %f0
511 des_round %f4, %f6, %f0, %f0
512 des_round %f8, %f10, %f0, %f0
513 des_round %f12, %f14, %f0, %f0
514 des_round %f16, %f18, %f0, %f0
515 ldd [$key + 0x80+0x00], %f36
516 ldd [$key + 0x80+0x08], %f38
517 des_round %f20, %f22, %f0, %f0
518 ldd [$key + 0x80+0x10], %f40
519 ldd [$key + 0x80+0x18], %f42
520 des_round %f24, %f26, %f0, %f0
521 ldd [$key + 0x80+0x20], %f44
522 ldd [$key + 0x80+0x28], %f46
523 des_round %f28, %f30, %f0, %f0
524 ldd [$key + 0x80+0x30], %f48
525 ldd [$key + 0x80+0x38], %f50
526 des_round %f32, %f34, %f0, %f0
527 ldd [$key + 0x80+0x40], %f52
528 ldd [$key + 0x80+0x48], %f54
529 des_iip %f0, %f0
531 ldd [$key + 0x80+0x50], %f56
532 ldd [$key + 0x80+0x58], %f58
533 des_ip %f0, %f0
534 ldd [$key + 0x80+0x60], %f60
535 ldd [$key + 0x80+0x68], %f62
536 des_round %f36, %f38, %f0, %f0
537 ldd [$key + 0x80+0x70], %f36
538 ldd [$key + 0x80+0x78], %f38
539 des_round %f40, %f42, %f0, %f0
540 des_round %f44, %f46, %f0, %f0
541 des_round %f48, %f50, %f0, %f0
542 ldd [$key + 0x80-0x08], %f40
543 ldd [$key + 0x80-0x10], %f42
544 des_round %f52, %f54, %f0, %f0
545 ldd [$key + 0x80-0x18], %f44
546 ldd [$key + 0x80-0x20], %f46
547 des_round %f56, %f58, %f0, %f0
548 ldd [$key + 0x80-0x28], %f48
549 ldd [$key + 0x80-0x30], %f50
550 des_round %f60, %f62, %f0, %f0
551 ldd [$key + 0x80-0x38], %f52
552 ldd [$key + 0x80-0x40], %f54
553 des_round %f36, %f38, %f0, %f0
554 ldd [$key + 0x80-0x48], %f56
555 ldd [$key + 0x80-0x50], %f58
556 des_iip %f0, %f0
558 ldd [$key + 0x80-0x58], %f60
559 ldd [$key + 0x80-0x60], %f62
560 des_ip %f0, %f0
561 ldd [$key + 0x80-0x68], %f36
562 ldd [$key + 0x80-0x70], %f38
563 des_round %f40, %f42, %f0, %f0
564 ldd [$key + 0x80-0x78], %f40
565 ldd [$key + 0x80-0x80], %f42
566 des_round %f44, %f46, %f0, %f0
567 des_round %f48, %f50, %f0, %f0
568 des_round %f52, %f54, %f0, %f0
569 des_round %f56, %f58, %f0, %f0
570 des_round %f60, %f62, %f0, %f0
571 des_round %f36, %f38, %f0, %f0
572 des_round %f40, %f42, %f0, %f0
573 des_iip %f0, %f0
575 fxor %f2, %f0, %f0 ! ^= ivec
576 movxtod %g4, %f2
578 brnz,pn $omask, 2f
579 sub $len, 1, $len
581 std %f0, [$out + 0]
582 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
583 add $out, 8, $out
585 st %f2, [$ivec + 0] ! write out ivec
586 retl
587 st %f3, [$ivec + 4]
589 .align 16
590 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
591 ! and ~3x deterioration
592 ! in inp==out case
593 faligndata %f0, %f0, %f0 ! handle unaligned output
595 stda %f0, [$out + $omask]0xc0 ! partial store
596 add $out, 8, $out
597 orn %g0, $omask, $omask
598 stda %f0, [$out + $omask]0xc0 ! partial store
600 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
601 orn %g0, $omask, $omask
603 st %f2, [$ivec + 0] ! write out ivec
604 retl
605 st %f3, [$ivec + 4]
606 .type des_t4_ede3_cbc_decrypt,#function
607 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
610 $code.=<<___;
611 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
612 .align 4
615 &emit_assembler();
617 close STDOUT;