3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
12 # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
13 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
14 # the time being... Except that it has two code paths: one suitable
15 # for all SPARCv9 processors and one for VIS3-capable ones. Former
16 # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
17 # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
18 # ~100-230% faster than gcc-generated code and ~35-90% faster than
19 # the pure SPARCv9 code path.
28 ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
29 ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
32 #include <sparc_arch.h>
35 .register
%g2,#scratch
36 .register
%g3,#scratch
43 .globl bn_GF2m_mul_2x2
46 SPARC_LOAD_ADDRESS_LEAF
(OPENSSL_sparcv9cap_P
,%g1,%g5)
47 ld
[%g1+0],%g1 ! OPENSSL_sparcv9cap_P
[0]
49 andcc
%g1, SPARCV9_VIS3
, %g0
57 .word
0x95b262ab ! xmulx
%o1, %o3, %o2
58 .word
0x99b262cb ! xmulxhi
%o1, %o3, %o4
59 srlx
%o2, 32, %o1 ! 13 cycles later
69 save
%sp,-STACK_FRAME
-$locals,%sp
75 srlx
$a12,1,$a48 ! 0x7fff...
77 srlx
$a12,2,$a12 ! 0x3fff...
78 add
%sp,STACK_BIAS
+STACK_FRAME
,$tab
84 srax
$a4,63,@i[1] ! broadcast
61st bit
85 and $a48,$a4,$a4 ! (a
<<2)&0x7fff...
87 srax
$a2,63,@i[0] ! broadcast
62nd bit
88 and $a12,$a2,$a2 ! (a
<<1)&0x3fff...
89 srax
$a1,63,$lo ! broadcast
63rd bit
90 and $a48,$a1,$a1 ! (a
<<0)&0x1fff...
97 stx
%g0,[$tab+0*8] ! tab
[0]=0
99 stx
$a1,[$tab+1*8] ! tab
[1]=a1
100 stx
$a2,[$tab+2*8] ! tab
[2]=a2
102 stx
$a12,[$tab+3*8] ! tab
[3]=a1
^a2
105 stx
$a4,[$tab+4*8] ! tab
[4]=a4
107 stx
$a1,[$tab+5*8] ! tab
[5]=a1
^a4
109 stx
$a2,[$tab+6*8] ! tab
[6]=a2
^a4
111 stx
$a12,[$tab+7*8] ! tab
[7]=a1
^a2
^a4
114 stx
$a8,[$tab+8*8] ! tab
[8]=a8
116 stx
$a1,[$tab+9*8] ! tab
[9]=a1
^a8
118 stx
$a2,[$tab+10*8] ! tab
[10]=a2
^a8
120 stx
$a12,[$tab+11*8] ! tab
[11]=a1
^a2
^a8
123 stx
$a48,[$tab+12*8] ! tab
[12]=a4
^a8
125 stx
$a1,[$tab+13*8] ! tab
[13]=a1
^a4
^a8
127 stx
$a2,[$tab+14*8] ! tab
[14]=a2
^a4
^a8
129 stx
$a12,[$tab+15*8] ! tab
[15]=a1
^a2
^a4
^a8
134 and @i[0],`0xf<<3`,@i[0]
136 ldx
[$tab+@i[0]],@i[0]
139 and @i[1],`0xf<<3`,@i[1]
141 ldx
[$tab+@i[1]],@i[1]
147 and @i[0],`0xf<<3`,@i[0]
149 for($n=1;$n<14;$n++) {
151 sllx
@i[1],`$n*4`,@T[0]
152 ldx
[$tab+@i[0]],@i[0]
153 srlx
@i[1],`64-$n*4`,@T[1]
155 srlx
$b,`($n+2)*4`-3,@i[1]
157 and @i[1],`0xf<<3`,@i[1]
159 push(@i,shift(@i)); push(@T,shift(@T));
162 sllx
@i[1],`$n*4`,@T[0]
163 ldx
[$tab+@i[0]],@i[0]
164 srlx
@i[1],`64-$n*4`,@T[1]
167 sllx
@i[0],`($n+1)*4`,@T[0]
169 srlx
@i[0],`64-($n+1)*4`,@T[1]
182 .type bn_GF2m_mul_2x2
,#function
183 .size bn_GF2m_mul_2x2
,.-bn_GF2m_mul_2x2
184 .asciz
"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
188 $code =~ s/\`([^\`]*)\`/eval($1)/gem;