OpenSSL: update to 1.0.2a
[tomato.git] / release / src-rt-6.x.4708 / router / openssl / crypto / x86_64cpuid.pl
blobd208d02392e9d2770809a800d2bcc1818bb3e38e
1 #!/usr/bin/env perl
3 $flavour = shift;
4 $output = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12 die "can't locate x86_64-xlate.pl";
14 open OUT,"| \"$^X\" $xlate $flavour $output";
15 *STDOUT=*OUT;
17 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
20 print<<___;
21 .extern OPENSSL_cpuid_setup
22 .hidden OPENSSL_cpuid_setup
23 .section .init
24 call OPENSSL_cpuid_setup
26 .hidden OPENSSL_ia32cap_P
27 .comm OPENSSL_ia32cap_P,16,4
29 .text
31 .globl OPENSSL_atomic_add
32 .type OPENSSL_atomic_add,\@abi-omnipotent
33 .align 16
34 OPENSSL_atomic_add:
35 movl ($arg1),%eax
36 .Lspin: leaq ($arg2,%rax),%r8
37 .byte 0xf0 # lock
38 cmpxchgl %r8d,($arg1)
39 jne .Lspin
40 movl %r8d,%eax
41 .byte 0x48,0x98 # cltq/cdqe
42 ret
43 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
45 .globl OPENSSL_rdtsc
46 .type OPENSSL_rdtsc,\@abi-omnipotent
47 .align 16
48 OPENSSL_rdtsc:
49 rdtsc
50 shl \$32,%rdx
51 or %rdx,%rax
52 ret
53 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
55 .globl OPENSSL_ia32_cpuid
56 .type OPENSSL_ia32_cpuid,\@function,1
57 .align 16
58 OPENSSL_ia32_cpuid:
59 mov %rbx,%r8 # save %rbx
61 xor %eax,%eax
62 mov %eax,8(%rdi) # clear 3rd word
63 cpuid
64 mov %eax,%r11d # max value for standard query level
66 xor %eax,%eax
67 cmp \$0x756e6547,%ebx # "Genu"
68 setne %al
69 mov %eax,%r9d
70 cmp \$0x49656e69,%edx # "ineI"
71 setne %al
72 or %eax,%r9d
73 cmp \$0x6c65746e,%ecx # "ntel"
74 setne %al
75 or %eax,%r9d # 0 indicates Intel CPU
76 jz .Lintel
78 cmp \$0x68747541,%ebx # "Auth"
79 setne %al
80 mov %eax,%r10d
81 cmp \$0x69746E65,%edx # "enti"
82 setne %al
83 or %eax,%r10d
84 cmp \$0x444D4163,%ecx # "cAMD"
85 setne %al
86 or %eax,%r10d # 0 indicates AMD CPU
87 jnz .Lintel
89 # AMD specific
90 mov \$0x80000000,%eax
91 cpuid
92 cmp \$0x80000001,%eax
93 jb .Lintel
94 mov %eax,%r10d
95 mov \$0x80000001,%eax
96 cpuid
97 or %ecx,%r9d
98 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
100 cmp \$0x80000008,%r10d
101 jb .Lintel
103 mov \$0x80000008,%eax
104 cpuid
105 movzb %cl,%r10 # number of cores - 1
106 inc %r10 # number of cores
108 mov \$1,%eax
109 cpuid
110 bt \$28,%edx # test hyper-threading bit
111 jnc .Lgeneric
112 shr \$16,%ebx # number of logical processors
113 cmp %r10b,%bl
114 ja .Lgeneric
115 and \$0xefffffff,%edx # ~(1<<28)
116 jmp .Lgeneric
118 .Lintel:
119 cmp \$4,%r11d
120 mov \$-1,%r10d
121 jb .Lnocacheinfo
123 mov \$4,%eax
124 mov \$0,%ecx # query L1D
125 cpuid
126 mov %eax,%r10d
127 shr \$14,%r10d
128 and \$0xfff,%r10d # number of cores -1 per L1D
130 cmp \$7,%r11d
131 jb .Lnocacheinfo
133 mov \$7,%eax
134 xor %ecx,%ecx
135 cpuid
136 mov %ebx,8(%rdi)
138 .Lnocacheinfo:
139 mov \$1,%eax
140 cpuid
141 and \$0xbfefffff,%edx # force reserved bits to 0
142 cmp \$0,%r9d
143 jne .Lnotintel
144 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
145 and \$15,%ah
146 cmp \$15,%ah # examine Family ID
147 jne .Lnotintel
148 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
149 .Lnotintel:
150 bt \$28,%edx # test hyper-threading bit
151 jnc .Lgeneric
152 and \$0xefffffff,%edx # ~(1<<28)
153 cmp \$0,%r10d
154 je .Lgeneric
156 or \$0x10000000,%edx # 1<<28
157 shr \$16,%ebx
158 cmp \$1,%bl # see if cache is shared
159 ja .Lgeneric
160 and \$0xefffffff,%edx # ~(1<<28)
161 .Lgeneric:
162 and \$0x00000800,%r9d # isolate AMD XOP flag
163 and \$0xfffff7ff,%ecx
164 or %ecx,%r9d # merge AMD XOP flag
166 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
167 bt \$27,%r9d # check OSXSAVE bit
168 jnc .Lclear_avx
169 xor %ecx,%ecx # XCR0
170 .byte 0x0f,0x01,0xd0 # xgetbv
171 and \$6,%eax # isolate XMM and YMM state support
172 cmp \$6,%eax
173 je .Ldone
174 .Lclear_avx:
175 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
176 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
177 andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
178 .Ldone:
179 shl \$32,%r9
180 mov %r10d,%eax
181 mov %r8,%rbx # restore %rbx
182 or %r9,%rax
184 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
186 .globl OPENSSL_cleanse
187 .type OPENSSL_cleanse,\@abi-omnipotent
188 .align 16
189 OPENSSL_cleanse:
190 xor %rax,%rax
191 cmp \$15,$arg2
192 jae .Lot
193 cmp \$0,$arg2
194 je .Lret
195 .Little:
196 mov %al,($arg1)
197 sub \$1,$arg2
198 lea 1($arg1),$arg1
199 jnz .Little
200 .Lret:
202 .align 16
203 .Lot:
204 test \$7,$arg1
205 jz .Laligned
206 mov %al,($arg1)
207 lea -1($arg2),$arg2
208 lea 1($arg1),$arg1
209 jmp .Lot
210 .Laligned:
211 mov %rax,($arg1)
212 lea -8($arg2),$arg2
213 test \$-8,$arg2
214 lea 8($arg1),$arg1
215 jnz .Laligned
216 cmp \$0,$arg2
217 jne .Little
219 .size OPENSSL_cleanse,.-OPENSSL_cleanse
222 print<<___ if (!$win64);
223 .globl OPENSSL_wipe_cpu
224 .type OPENSSL_wipe_cpu,\@abi-omnipotent
225 .align 16
226 OPENSSL_wipe_cpu:
227 pxor %xmm0,%xmm0
228 pxor %xmm1,%xmm1
229 pxor %xmm2,%xmm2
230 pxor %xmm3,%xmm3
231 pxor %xmm4,%xmm4
232 pxor %xmm5,%xmm5
233 pxor %xmm6,%xmm6
234 pxor %xmm7,%xmm7
235 pxor %xmm8,%xmm8
236 pxor %xmm9,%xmm9
237 pxor %xmm10,%xmm10
238 pxor %xmm11,%xmm11
239 pxor %xmm12,%xmm12
240 pxor %xmm13,%xmm13
241 pxor %xmm14,%xmm14
242 pxor %xmm15,%xmm15
243 xorq %rcx,%rcx
244 xorq %rdx,%rdx
245 xorq %rsi,%rsi
246 xorq %rdi,%rdi
247 xorq %r8,%r8
248 xorq %r9,%r9
249 xorq %r10,%r10
250 xorq %r11,%r11
251 leaq 8(%rsp),%rax
253 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
255 print<<___ if ($win64);
256 .globl OPENSSL_wipe_cpu
257 .type OPENSSL_wipe_cpu,\@abi-omnipotent
258 .align 16
259 OPENSSL_wipe_cpu:
260 pxor %xmm0,%xmm0
261 pxor %xmm1,%xmm1
262 pxor %xmm2,%xmm2
263 pxor %xmm3,%xmm3
264 pxor %xmm4,%xmm4
265 pxor %xmm5,%xmm5
266 xorq %rcx,%rcx
267 xorq %rdx,%rdx
268 xorq %r8,%r8
269 xorq %r9,%r9
270 xorq %r10,%r10
271 xorq %r11,%r11
272 leaq 8(%rsp),%rax
274 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
277 print<<___;
278 .globl OPENSSL_ia32_rdrand
279 .type OPENSSL_ia32_rdrand,\@abi-omnipotent
280 .align 16
281 OPENSSL_ia32_rdrand:
282 mov \$8,%ecx
283 .Loop_rdrand:
284 rdrand %rax
285 jc .Lbreak_rdrand
286 loop .Loop_rdrand
287 .Lbreak_rdrand:
288 cmp \$0,%rax
289 cmove %rcx,%rax
291 .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
293 .globl OPENSSL_ia32_rdseed
294 .type OPENSSL_ia32_rdseed,\@abi-omnipotent
295 .align 16
296 OPENSSL_ia32_rdseed:
297 mov \$8,%ecx
298 .Loop_rdseed:
299 rdseed %rax
300 jc .Lbreak_rdseed
301 loop .Loop_rdseed
302 .Lbreak_rdseed:
303 cmp \$0,%rax
304 cmove %rcx,%rax
306 .size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
309 close STDOUT; # flush