1 .file "matmul.polly.interchanged+tiled+vector.ll"
2 .section .rodata.cst8,"aM",@progbits,8
5 .quad 4602678819172646912 # double 0.5
9 .type init_array,@function
10 init_array
: # @init_array
15 .cfi_def_cfa_offset 16
20 .cfi_def_cfa_register %rbp
22 vmovsd
.LCPI0_0(%rip), %xmm0
24 .LBB0_1: # %polly.loop_preheader3
25 # =>This Loop Header: Depth=1
26 # Child Loop BB0_2 Depth 2
29 .LBB0_2: # %polly.loop_header2
30 # Parent Loop BB0_1 Depth=1
31 # => This Inner Loop Header: Depth=2
38 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
42 leal
1(%rdx
,%rsi
), %edi
43 leaq
(%rax
,%rax
,2), %rsi
45 cmpq $
1536, %rdx
# imm = 0x600
46 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
47 vmulsd
%xmm0
, %xmm1
, %xmm1
48 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
49 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
50 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
53 # BB#3: # %polly.loop_exit4
54 # in Loop: Header=BB0_1 Depth=1
56 cmpq $
1536, %r8 # imm = 0x600
58 # BB#4: # %polly.loop_exit
62 .size init_array, .Ltmp5-init_array
67 .type print_array,@function
68 print_array
: # @print_array
73 .cfi_def_cfa_offset 16
78 .cfi_def_cfa_register %rbp
94 .LBB1_1: # %for.cond1.preheader
95 # =>This Loop Header: Depth=1
96 # Child Loop BB1_2 Depth 2
97 movq stdout
(%rip
), %rax
101 .LBB1_2: # %for.body3
102 # Parent Loop BB1_1 Depth=1
103 # => This Inner Loop Header: Depth=2
105 vcvtss2sd
%xmm0
, %xmm0
, %xmm0
111 imulq $
1717986919, %rax
, %rcx
# imm = 0x66666667
116 imull $
80, %ecx
, %ecx
121 # in Loop: Header=BB1_2 Depth=2
122 movq stdout
(%rip
), %rsi
126 # in Loop: Header=BB1_2 Depth=2
129 movq stdout
(%rip
), %rax
130 cmpq $
1536, %rbx
# imm = 0x600
133 # in Loop: Header=BB1_1 Depth=1
137 addq $
6144, %r15 # imm = 0x1800
139 cmpq $
1536, %r14 # imm = 0x600
149 .size print_array, .Ltmp16-print_array
152 .section .rodata.cst8,"aM",@progbits,8
155 .quad 4602678819172646912 # double 0.5
165 .cfi_def_cfa_offset 16
167 .cfi_offset %rbp, -16
170 .cfi_def_cfa_register %rbp
178 .cfi_offset %rbx, -56
180 .cfi_offset %r12, -48
182 .cfi_offset %r13, -40
184 .cfi_offset %r14, -32
186 .cfi_offset %r15, -24
188 vmovsd
.LCPI2_0(%rip), %xmm0
190 .LBB2_1: # %polly.loop_preheader3.i
191 # =>This Loop Header: Depth=1
192 # Child Loop BB2_2 Depth 2
195 .LBB2_2: # %polly.loop_header2.i
196 # Parent Loop BB2_1 Depth=1
197 # => This Inner Loop Header: Depth=2
204 andl $
-1024, %esi
# imm = 0xFFFFFFFFFFFFFC00
208 leal
1(%rdx
,%rsi
), %edi
209 leaq
(%rax
,%rax
,2), %rsi
211 cmpq $
1536, %rdx
# imm = 0x600
212 vcvtsi2sdl
%edi
, %xmm0
, %xmm1
213 vmulsd
%xmm0
, %xmm1
, %xmm1
214 vcvtsd2ss
%xmm1
, %xmm1
, %xmm1
215 vmovss
%xmm1
, A(%rsi
,%rcx
,4)
216 vmovss
%xmm1
, B(%rsi
,%rcx
,4)
219 # BB#3: # %polly.loop_exit4.i
220 # in Loop: Header=BB2_1 Depth=1
222 cmpq $
1536, %rbx
# imm = 0x600
224 # BB#4: # %polly.loop_preheader3.preheader
227 movl $
9437184, %edx
# imm = 0x900000
231 movq
%rax
, -88(%rbp
) # 8-byte Spill
233 .LBB2_5: # %polly.loop_preheader17
234 # =>This Loop Header: Depth=1
235 # Child Loop BB2_15 Depth 2
236 # Child Loop BB2_8 Depth 3
237 # Child Loop BB2_11 Depth 4
238 # Child Loop BB2_17 Depth 5
239 # Child Loop BB2_18 Depth 6
240 movq
%rsi
, -56(%rbp
) # 8-byte Spill
243 movq
%rax
, -72(%rbp
) # 8-byte Spill
245 movq
%rax
, -48(%rbp
) # 8-byte Spill
248 .LBB2_15: # %polly.loop_preheader24
249 # Parent Loop BB2_5 Depth=1
250 # => This Loop Header: Depth=2
251 # Child Loop BB2_8 Depth 3
252 # Child Loop BB2_11 Depth 4
253 # Child Loop BB2_17 Depth 5
254 # Child Loop BB2_18 Depth 6
255 movq
%rdx
, -80(%rbp
) # 8-byte Spill
269 movq
-88(%rbp
), %rax
# 8-byte Reload
270 leaq
(%rax
,%r15,4), %rax
271 movq
%rax
, -64(%rbp
) # 8-byte Spill
272 leaq B+
16(,%r15,4), %rbx
275 .LBB2_8: # %polly.loop_header23
276 # Parent Loop BB2_5 Depth=1
277 # Parent Loop BB2_15 Depth=2
278 # => This Loop Header: Depth=3
279 # Child Loop BB2_11 Depth 4
280 # Child Loop BB2_17 Depth 5
281 # Child Loop BB2_18 Depth 6
282 cmpq
-72(%rbp
), %rsi
# 8-byte Folded Reload
284 # BB#9: # %polly.loop_header30.preheader
285 # in Loop: Header=BB2_8 Depth=3
290 # BB#10: # in Loop: Header=BB2_8 Depth=3
292 movq
-64(%rbp
), %r14 # 8-byte Reload
293 movq
-56(%rbp
), %r11 # 8-byte Reload
295 .LBB2_11: # %polly.loop_header37.preheader
296 # Parent Loop BB2_5 Depth=1
297 # Parent Loop BB2_15 Depth=2
298 # Parent Loop BB2_8 Depth=3
299 # => This Loop Header: Depth=4
300 # Child Loop BB2_17 Depth 5
301 # Child Loop BB2_18 Depth 6
307 .LBB2_17: # %polly.loop_header46.preheader
308 # Parent Loop BB2_5 Depth=1
309 # Parent Loop BB2_15 Depth=2
310 # Parent Loop BB2_8 Depth=3
311 # Parent Loop BB2_11 Depth=4
312 # => This Loop Header: Depth=5
313 # Child Loop BB2_18 Depth 6
314 leaq
(%r11,%r11,2), %rcx
316 vbroadcastss
A(%rcx
,%rsi
,4), %xmm0
320 .LBB2_18: # %polly.loop_header46
321 # Parent Loop BB2_5 Depth=1
322 # Parent Loop BB2_15 Depth=2
323 # Parent Loop BB2_8 Depth=3
324 # Parent Loop BB2_11 Depth=4
325 # Parent Loop BB2_17 Depth=5
326 # => This Inner Loop Header: Depth=6
327 vmulps
(%r9), %xmm0
, %xmm1
328 vaddps
(%rdi
), %xmm1
, %xmm1
329 vmovaps
%xmm1
, (%rdi
)
335 # BB#16: # %polly.loop_exit48
336 # in Loop: Header=BB2_17 Depth=5
337 addq $
6144, %r8 # imm = 0x1800
342 .LBB2_12: # %polly.loop_exit39
343 # in Loop: Header=BB2_11 Depth=4
344 addq $
6144, %r14 # imm = 0x1800
345 cmpq
-48(%rbp
), %r11 # 8-byte Folded Reload
349 .LBB2_13: # %polly.loop_exit32
350 # in Loop: Header=BB2_8 Depth=3
351 addq $
393216, %rbx
# imm = 0x60000
352 cmpq $
1472, %r10 # imm = 0x5C0
354 movq
-56(%rbp
), %rsi
# 8-byte Reload
356 # BB#14: # %polly.loop_exit25
357 # in Loop: Header=BB2_15 Depth=2
358 movq
-80(%rbp
), %rdx
# 8-byte Reload
359 cmpq $
1472, %rdx
# imm = 0x5C0
362 # BB#6: # %polly.loop_exit18
363 # in Loop: Header=BB2_5 Depth=1
364 addq $
393216, -88(%rbp
) # 8-byte Folded Spill
366 cmpq $
1472, %rsi
# imm = 0x5C0
369 # BB#7: # %polly.loop_exit11
380 .size main, .Ltmp28-main
387 .type .L.str,@object # @.str
388 .section .rodata.str1.1,"aMS",@progbits,1
396 .section ".note.GNU-stack","",@progbits