1 .file "matmul.normalopt.ll"
2 .section .rodata.cst8,"aM",@progbits,8
5 .quad 4602678819172646912 # double 0.5
9 .type init_array,@function
10 init_array
: # @init_array
15 .cfi_def_cfa_offset 16
20 .cfi_def_cfa_register %rbp
22 vmovsd
.LCPI0_0(%rip), %xmm0
24 .LBB0_1: # %for.cond1.preheader
25 # =>This Loop Header: Depth=1
26 # Child Loop BB0_2 Depth 2
30 # Parent Loop BB0_1 Depth=1
31 # => This Inner Loop Header: Depth=2
38 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
42 leal
1(%rdx
,%rsi
), %edi
43 leaq
(%rax
,%rax
,2), %rsi
45 cmpq $
1536, %rdx
# imm = 0x600
46 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
47 vmulsd
%xmm0
, %xmm1
, %xmm1
48 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
49 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
50 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
54 # in Loop: Header=BB0_1 Depth=1
56 cmpq $
1536, %r8 # imm = 0x600
62 .size init_array, .Ltmp5-init_array
67 .type print_array,@function
68 print_array
: # @print_array
73 .cfi_def_cfa_offset 16
78 .cfi_def_cfa_register %rbp
94 .LBB1_1: # %for.cond1.preheader
95 # =>This Loop Header: Depth=1
96 # Child Loop BB1_2 Depth 2
97 movq stdout
(%rip
), %rax
101 .LBB1_2: # %for.body3
102 # Parent Loop BB1_1 Depth=1
103 # => This Inner Loop Header: Depth=2
105 vcvtss2sd
%xmm0
, %xmm0
, %xmm0
111 imulq $
1717986919, %rax
, %rcx
# imm = 0x66666667
116 imull $
80, %ecx
, %ecx
121 # in Loop: Header=BB1_2 Depth=2
122 movq stdout
(%rip
), %rsi
126 # in Loop: Header=BB1_2 Depth=2
129 movq stdout
(%rip
), %rax
130 cmpq $
1536, %rbx
# imm = 0x600
133 # in Loop: Header=BB1_1 Depth=1
137 addq $
6144, %r15 # imm = 0x1800
139 cmpq $
1536, %r14 # imm = 0x600
149 .size print_array, .Ltmp16-print_array
152 .section .rodata.cst8,"aM",@progbits,8
155 .quad 4602678819172646912 # double 0.5
165 .cfi_def_cfa_offset 16
167 .cfi_offset %rbp, -16
170 .cfi_def_cfa_register %rbp
172 vmovsd
.LCPI2_0(%rip), %xmm0
174 .LBB2_1: # %for.cond1.preheader.i
175 # =>This Loop Header: Depth=1
176 # Child Loop BB2_2 Depth 2
179 .LBB2_2: # %for.body3.i
180 # Parent Loop BB2_1 Depth=1
181 # => This Inner Loop Header: Depth=2
188 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
192 leal
1(%rdx
,%rsi
), %edi
193 leaq
(%rax
,%rax
,2), %rsi
195 cmpq $
1536, %rdx
# imm = 0x600
196 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
197 vmulsd
%xmm0
, %xmm1
, %xmm1
198 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
199 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
200 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
203 # BB#3: # %for.inc17.i
204 # in Loop: Header=BB2_1 Depth=1
206 cmpq $
1536, %r8 # imm = 0x600
212 .LBB2_5: # %for.cond1.preheader
213 # =>This Loop Header: Depth=1
214 # Child Loop BB2_6 Depth 2
215 # Child Loop BB2_7 Depth 3
216 leaq
(%r8,%r8,2), %rdx
221 .LBB2_6: # %for.body3
222 # Parent Loop BB2_5 Depth=1
223 # => This Loop Header: Depth=2
224 # Child Loop BB2_7 Depth 3
226 vxorps
%xmm0
, %xmm0
, %xmm0
227 movq $
-9437184, %rax
# imm = 0xFFFFFFFFFF700000
230 .LBB2_7: # %for.body8
231 # Parent Loop BB2_5 Depth=1
232 # Parent Loop BB2_6 Depth=2
233 # => This Inner Loop Header: Depth=3
235 vmulss B+
9437184(%rax
,%rdi
,4), %xmm1
, %xmm1
236 vaddss
%xmm1
, %xmm0
, %xmm0
238 addq $
6144, %rax
# imm = 0x1800
241 # in Loop: Header=BB2_6 Depth=2
243 leaq C+
4(%rdx
,%rdi
,4), %rsi
245 cmpq $
1536, %rdi
# imm = 0x600
248 # in Loop: Header=BB2_5 Depth=1
249 addq $
6144, %r9 # imm = 0x1800
251 cmpq $
1536, %r8 # imm = 0x600
253 # BB#10: # %for.end30
258 .size main, .Ltmp22-main
265 .type .L.str,@object # @.str
266 .section .rodata.str1.1,"aMS",@progbits,1
274 .section ".note.GNU-stack","",@progbits