1 .file "matmul.polly.interchanged+tiled.ll"
2 .section .rodata.cst8,"aM",@progbits,8
5 .quad 4602678819172646912 # double 0.5
9 .type init_array,@function
10 init_array
: # @init_array
15 .cfi_def_cfa_offset 16
20 .cfi_def_cfa_register %rbp
22 vmovsd
.LCPI0_0(%rip), %xmm0
24 .LBB0_1: # %polly.loop_preheader3
25 # =>This Loop Header: Depth=1
26 # Child Loop BB0_2 Depth 2
29 .LBB0_2: # %polly.loop_header2
30 # Parent Loop BB0_1 Depth=1
31 # => This Inner Loop Header: Depth=2
38 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
42 leal
1(%rdx
,%rsi
), %edi
43 leaq
(%rax
,%rax
,2), %rsi
45 cmpq $
1536, %rdx
# imm = 0x600
46 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
47 vmulsd
%xmm0
, %xmm1
, %xmm1
48 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
49 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
50 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
53 # BB#3: # %polly.loop_exit4
54 # in Loop: Header=BB0_1 Depth=1
56 cmpq $
1536, %r8 # imm = 0x600
58 # BB#4: # %polly.loop_exit
62 .size init_array, .Ltmp5-init_array
67 .type print_array,@function
68 print_array
: # @print_array
73 .cfi_def_cfa_offset 16
78 .cfi_def_cfa_register %rbp
94 .LBB1_1: # %for.cond1.preheader
95 # =>This Loop Header: Depth=1
96 # Child Loop BB1_2 Depth 2
97 movq stdout
(%rip
), %rax
101 .LBB1_2: # %for.body3
102 # Parent Loop BB1_1 Depth=1
103 # => This Inner Loop Header: Depth=2
105 vcvtss2sd
%xmm0
, %xmm0
, %xmm0
111 imulq $
1717986919, %rax
, %rcx
# imm = 0x66666667
116 imull $
80, %ecx
, %ecx
121 # in Loop: Header=BB1_2 Depth=2
122 movq stdout
(%rip
), %rsi
126 # in Loop: Header=BB1_2 Depth=2
129 movq stdout
(%rip
), %rax
130 cmpq $
1536, %rbx
# imm = 0x600
133 # in Loop: Header=BB1_1 Depth=1
137 addq $
6144, %r15 # imm = 0x1800
139 cmpq $
1536, %r14 # imm = 0x600
149 .size print_array, .Ltmp16-print_array
152 .section .rodata.cst8,"aM",@progbits,8
155 .quad 4602678819172646912 # double 0.5
165 .cfi_def_cfa_offset 16
167 .cfi_offset %rbp, -16
170 .cfi_def_cfa_register %rbp
178 .cfi_offset %rbx, -56
180 .cfi_offset %r12, -48
182 .cfi_offset %r13, -40
184 .cfi_offset %r14, -32
186 .cfi_offset %r15, -24
188 vmovsd
.LCPI2_0(%rip), %xmm0
190 .LBB2_1: # %polly.loop_preheader3.i
191 # =>This Loop Header: Depth=1
192 # Child Loop BB2_2 Depth 2
195 .LBB2_2: # %polly.loop_header2.i
196 # Parent Loop BB2_1 Depth=1
197 # => This Inner Loop Header: Depth=2
204 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
208 leal
1(%rdx
,%rsi
), %edi
209 leaq
(%rax
,%rax
,2), %rsi
211 cmpq $
1536, %rdx
# imm = 0x600
212 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
213 vmulsd
%xmm0
, %xmm1
, %xmm1
214 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
215 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
216 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
219 # BB#3: # %polly.loop_exit4.i
220 # in Loop: Header=BB2_1 Depth=1
222 cmpq $
1536, %rbx
# imm = 0x600
224 # BB#4: # %polly.loop_preheader3.preheader
228 movl $
9437184, %edx
# imm = 0x900000
232 .LBB2_5: # %polly.loop_preheader17
233 # =>This Loop Header: Depth=1
234 # Child Loop BB2_15 Depth 2
235 # Child Loop BB2_8 Depth 3
236 # Child Loop BB2_11 Depth 4
237 # Child Loop BB2_17 Depth 5
238 # Child Loop BB2_18 Depth 6
239 movq
%rax
, -56(%rbp
) # 8-byte Spill
240 movq
%rbx
, -88(%rbp
) # 8-byte Spill
243 movq
%rcx
, -72(%rbp
) # 8-byte Spill
245 movq
%rcx
, -48(%rbp
) # 8-byte Spill
248 movq
%rbx
, -64(%rbp
) # 8-byte Spill
251 .LBB2_15: # %polly.loop_preheader24
252 # Parent Loop BB2_5 Depth=1
253 # => This Loop Header: Depth=2
254 # Child Loop BB2_8 Depth 3
255 # Child Loop BB2_11 Depth 4
256 # Child Loop BB2_17 Depth 5
257 # Child Loop BB2_18 Depth 6
258 movq
%rcx
, -80(%rbp
) # 8-byte Spill
265 .LBB2_8: # %polly.loop_header23
266 # Parent Loop BB2_5 Depth=1
267 # Parent Loop BB2_15 Depth=2
268 # => This Loop Header: Depth=3
269 # Child Loop BB2_11 Depth 4
270 # Child Loop BB2_17 Depth 5
271 # Child Loop BB2_18 Depth 6
272 cmpq
-72(%rbp
), %rax
# 8-byte Folded Reload
274 # BB#9: # %polly.loop_header30.preheader
275 # in Loop: Header=BB2_8 Depth=3
280 # BB#10: # in Loop: Header=BB2_8 Depth=3
282 movq
-64(%rbp
), %r10 # 8-byte Reload
283 movq
-56(%rbp
), %r11 # 8-byte Reload
285 .LBB2_11: # %polly.loop_header37.preheader
286 # Parent Loop BB2_5 Depth=1
287 # Parent Loop BB2_15 Depth=2
288 # Parent Loop BB2_8 Depth=3
289 # => This Loop Header: Depth=4
290 # Child Loop BB2_17 Depth 5
291 # Child Loop BB2_18 Depth 6
297 .LBB2_17: # %polly.loop_header46.preheader
298 # Parent Loop BB2_5 Depth=1
299 # Parent Loop BB2_15 Depth=2
300 # Parent Loop BB2_8 Depth=3
301 # Parent Loop BB2_11 Depth=4
302 # => This Loop Header: Depth=5
303 # Child Loop BB2_18 Depth 6
304 leaq
(%r11,%r11,2), %rsi
306 vmovss
A(%rsi
,%rcx
,4), %xmm0
310 .LBB2_18: # %polly.loop_header46
311 # Parent Loop BB2_5 Depth=1
312 # Parent Loop BB2_15 Depth=2
313 # Parent Loop BB2_8 Depth=3
314 # Parent Loop BB2_11 Depth=4
315 # Parent Loop BB2_17 Depth=5
316 # => This Inner Loop Header: Depth=6
317 vmulss
(%r8), %xmm0
, %xmm1
318 vaddss
(%rdi
), %xmm1
, %xmm1
325 # BB#16: # %polly.loop_exit48
326 # in Loop: Header=BB2_17 Depth=5
327 addq $
6144, %r14 # imm = 0x1800
332 .LBB2_12: # %polly.loop_exit39
333 # in Loop: Header=BB2_11 Depth=4
334 addq $
6144, %r10 # imm = 0x1800
335 cmpq
-48(%rbp
), %r11 # 8-byte Folded Reload
339 .LBB2_13: # %polly.loop_exit32
340 # in Loop: Header=BB2_8 Depth=3
341 addq $
393216, %rdx
# imm = 0x60000
342 cmpq $
1472, %r9 # imm = 0x5C0
344 movq
-56(%rbp
), %rax
# 8-byte Reload
346 # BB#14: # %polly.loop_exit25
347 # in Loop: Header=BB2_15 Depth=2
348 addq $
256, -64(%rbp
) # 8-byte Folded Spill
350 movq
-80(%rbp
), %rcx
# 8-byte Reload
351 addq $
256, %rcx
# imm = 0x100
353 cmpq $
1472, %r12 # imm = 0x5C0
356 # BB#6: # %polly.loop_exit18
357 # in Loop: Header=BB2_5 Depth=1
358 movq
-88(%rbp
), %rbx
# 8-byte Reload
359 addq $
393216, %rbx
# imm = 0x60000
360 cmpq $
1472, %rax
# imm = 0x5C0
363 # BB#7: # %polly.loop_exit11
374 .size main, .Ltmp28-main
381 .type .L.str,@object # @.str
382 .section .rodata.str1.1,"aMS",@progbits,1
390 .section ".note.GNU-stack","",@progbits