import libcrypto (LibreSSL 2.5.2)
[unleashed.git] / lib / libcrypto / x86_64cpuid.pl
blob6558dedb6beef4fd7daa58a17e367765ac95b9b3
1 #!/usr/bin/env perl
3 $flavour = shift;
4 $output = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
7 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
8 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
9 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
10 die "can't locate x86_64-xlate.pl";
12 open OUT,"| \"$^X\" $xlate $flavour $output";
13 *STDOUT=*OUT;
15 ($arg1,$arg2,$arg3,$arg4)=("%rdi","%rsi","%rdx","%rcx"); # Unix order
17 print<<___;
18 .extern OPENSSL_cpuid_setup
19 .hidden OPENSSL_cpuid_setup
20 .section .init
21 call OPENSSL_cpuid_setup
23 .extern OPENSSL_ia32cap_P
24 .hidden OPENSSL_ia32cap_P
26 .text
28 .globl OPENSSL_atomic_add
29 .type OPENSSL_atomic_add,\@abi-omnipotent
30 .align 16
31 OPENSSL_atomic_add:
32 movl ($arg1),%eax
33 .Lspin: leaq ($arg2,%rax),%r8
34 .byte 0xf0 # lock
35 cmpxchgl %r8d,($arg1)
36 jne .Lspin
37 movl %r8d,%eax
38 .byte 0x48,0x98 # cltq/cdqe
39 ret
40 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
42 .globl OPENSSL_ia32_cpuid
43 .type OPENSSL_ia32_cpuid,\@abi-omnipotent
44 .align 16
45 OPENSSL_ia32_cpuid:
46 mov %rbx,%r8 # save %rbx
48 xor %eax,%eax
49 cpuid
50 mov %eax,%r11d # max value for standard query level
52 xor %eax,%eax
53 cmp \$0x756e6547,%ebx # "Genu"
54 setne %al
55 mov %eax,%r9d
56 cmp \$0x49656e69,%edx # "ineI"
57 setne %al
58 or %eax,%r9d
59 cmp \$0x6c65746e,%ecx # "ntel"
60 setne %al
61 or %eax,%r9d # 0 indicates Intel CPU
62 jz .Lintel
64 cmp \$0x68747541,%ebx # "Auth"
65 setne %al
66 mov %eax,%r10d
67 cmp \$0x69746E65,%edx # "enti"
68 setne %al
69 or %eax,%r10d
70 cmp \$0x444D4163,%ecx # "cAMD"
71 setne %al
72 or %eax,%r10d # 0 indicates AMD CPU
73 jnz .Lintel
75 # AMD specific
76 mov \$0x80000000,%eax
77 cpuid
78 cmp \$0x80000001,%eax
79 jb .Lintel
80 mov %eax,%r10d
81 mov \$0x80000001,%eax
82 cpuid
83 and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit
84 or \$1,%r9d # make sure %r9d is not zero
86 cmp \$0x80000008,%r10d
87 jb .Lintel
89 mov \$0x80000008,%eax
90 cpuid
91 movzb %cl,%r10 # number of cores - 1
92 inc %r10 # number of cores
94 mov \$1,%eax
95 cpuid
96 bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
97 jnc .Lgeneric
98 shr \$16,%ebx # number of logical processors
99 cmp %r10b,%bl
100 ja .Lgeneric
101 xor \$IA32CAP_MASK0_HT,%edx
102 jmp .Lgeneric
104 .Lintel:
105 cmp \$4,%r11d
106 mov \$-1,%r10d
107 jb .Lnocacheinfo
109 mov \$4,%eax
110 mov \$0,%ecx # query L1D
111 cpuid
112 mov %eax,%r10d
113 shr \$14,%r10d
114 and \$0xfff,%r10d # number of cores -1 per L1D
116 .Lnocacheinfo:
117 mov \$1,%eax
118 cpuid
119 # force reserved bits to 0
120 and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx
121 cmp \$0,%r9d
122 jne .Lnotintel
123 # set reserved bit#30 on Intel CPUs
124 or \$IA32CAP_MASK0_INTEL,%edx
125 and \$15,%ah
126 cmp \$15,%ah # examine Family ID
127 jne .Lnotintel
128 # set reserved bit#20 to engage RC4_CHAR
129 or \$IA32CAP_MASK0_INTELP4,%edx
130 .Lnotintel:
131 bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
132 jnc .Lgeneric
133 xor \$IA32CAP_MASK0_HT,%edx
134 cmp \$0,%r10d
135 je .Lgeneric
137 or \$IA32CAP_MASK0_HT,%edx
138 shr \$16,%ebx
139 cmp \$1,%bl # see if cache is shared
140 ja .Lgeneric
141 xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not
143 .Lgeneric:
144 and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag
145 and \$(~IA32CAP_MASK1_AMD_XOP),%ecx
146 or %ecx,%r9d # merge AMD XOP flag
148 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
149 bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit
150 jnc .Lclear_avx
151 xor %ecx,%ecx # XCR0
152 .byte 0x0f,0x01,0xd0 # xgetbv
153 and \$6,%eax # isolate XMM and YMM state support
154 cmp \$6,%eax
155 je .Ldone
156 .Lclear_avx:
157 mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax
158 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
159 .Ldone:
160 shl \$32,%r9
161 mov %r10d,%eax
162 mov %r8,%rbx # restore %rbx
163 or %r9,%rax
165 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
168 print<<___;
169 .globl OPENSSL_wipe_cpu
170 .type OPENSSL_wipe_cpu,\@abi-omnipotent
171 .align 16
172 OPENSSL_wipe_cpu:
173 pxor %xmm0,%xmm0
174 pxor %xmm1,%xmm1
175 pxor %xmm2,%xmm2
176 pxor %xmm3,%xmm3
177 pxor %xmm4,%xmm4
178 pxor %xmm5,%xmm5
179 pxor %xmm6,%xmm6
180 pxor %xmm7,%xmm7
181 pxor %xmm8,%xmm8
182 pxor %xmm9,%xmm9
183 pxor %xmm10,%xmm10
184 pxor %xmm11,%xmm11
185 pxor %xmm12,%xmm12
186 pxor %xmm13,%xmm13
187 pxor %xmm14,%xmm14
188 pxor %xmm15,%xmm15
189 xorq %rcx,%rcx
190 xorq %rdx,%rdx
191 xorq %rsi,%rsi
192 xorq %rdi,%rdi
193 xorq %r8,%r8
194 xorq %r9,%r9
195 xorq %r10,%r10
196 xorq %r11,%r11
197 leaq 8(%rsp),%rax
199 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
202 close STDOUT; # flush