Move www/experiments to docs/experiments
[polly-mirror.git] / docs / experiments / matmul / matmul.polly.interchanged+tiled+vector.s
blob485d230bc398b326f1286521ab3f310ab340294e
1 .file "matmul.polly.interchanged+tiled+vector.ll"
2 .section .rodata.cst8,"aM",@progbits,8
3 .align 8
4 .LCPI0_0:
5 .quad 4602678819172646912 # double 0.5
6 .text
7 .globl init_array
8 .align 16, 0x90
9 .type init_array,@function
10 init_array: # @init_array
11 .cfi_startproc
12 # BB#0: # %entry
13 pushq %rbp
14 .Ltmp2:
15 .cfi_def_cfa_offset 16
16 .Ltmp3:
17 .cfi_offset %rbp, -16
18 movq %rsp, %rbp
19 .Ltmp4:
20 .cfi_def_cfa_register %rbp
21 xorl %r8d, %r8d
22 vmovsd .LCPI0_0(%rip), %xmm0
23 .align 16, 0x90
24 .LBB0_1: # %polly.loop_preheader3
25 # =>This Loop Header: Depth=1
26 # Child Loop BB0_2 Depth 2
27 xorl %ecx, %ecx
28 .align 16, 0x90
29 .LBB0_2: # %polly.loop_header2
30 # Parent Loop BB0_1 Depth=1
31 # => This Inner Loop Header: Depth=2
32 movl %ecx, %edx
33 imull %r8d, %edx
34 movl %edx, %esi
35 sarl $31, %esi
36 shrl $22, %esi
37 addl %edx, %esi
38 andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
39 negl %esi
40 movq %r8, %rax
41 shlq $11, %rax
42 leal 1(%rdx,%rsi), %edi
43 leaq (%rax,%rax,2), %rsi
44 leaq 1(%rcx), %rdx
45 cmpq $1536, %rdx # imm = 0x600
46 vcvtsi2sdl %edi, %xmm0, %xmm1
47 vmulsd %xmm0, %xmm1, %xmm1
48 vcvtsd2ss %xmm1, %xmm1, %xmm1
49 vmovss %xmm1, A(%rsi,%rcx,4)
50 vmovss %xmm1, B(%rsi,%rcx,4)
51 movq %rdx, %rcx
52 jne .LBB0_2
53 # BB#3: # %polly.loop_exit4
54 # in Loop: Header=BB0_1 Depth=1
55 incq %r8
56 cmpq $1536, %r8 # imm = 0x600
57 jne .LBB0_1
58 # BB#4: # %polly.loop_exit
59 popq %rbp
60 ret
61 .Ltmp5:
62 .size init_array, .Ltmp5-init_array
63 .cfi_endproc
65 .globl print_array
66 .align 16, 0x90
67 .type print_array,@function
68 print_array: # @print_array
69 .cfi_startproc
70 # BB#0: # %entry
71 pushq %rbp
72 .Ltmp9:
73 .cfi_def_cfa_offset 16
74 .Ltmp10:
75 .cfi_offset %rbp, -16
76 movq %rsp, %rbp
77 .Ltmp11:
78 .cfi_def_cfa_register %rbp
79 pushq %r15
80 pushq %r14
81 pushq %r12
82 pushq %rbx
83 .Ltmp12:
84 .cfi_offset %rbx, -48
85 .Ltmp13:
86 .cfi_offset %r12, -40
87 .Ltmp14:
88 .cfi_offset %r14, -32
89 .Ltmp15:
90 .cfi_offset %r15, -24
91 xorl %r14d, %r14d
92 movl $C, %r15d
93 .align 16, 0x90
94 .LBB1_1: # %for.cond1.preheader
95 # =>This Loop Header: Depth=1
96 # Child Loop BB1_2 Depth 2
97 movq stdout(%rip), %rax
98 movq %r15, %r12
99 xorl %ebx, %ebx
100 .align 16, 0x90
101 .LBB1_2: # %for.body3
102 # Parent Loop BB1_1 Depth=1
103 # => This Inner Loop Header: Depth=2
104 vmovss (%r12), %xmm0
105 vcvtss2sd %xmm0, %xmm0, %xmm0
106 movq %rax, %rdi
107 movl $.L.str, %esi
108 movb $1, %al
109 callq fprintf
110 movslq %ebx, %rax
111 imulq $1717986919, %rax, %rcx # imm = 0x66666667
112 movq %rcx, %rdx
113 shrq $63, %rdx
114 sarq $37, %rcx
115 addl %edx, %ecx
116 imull $80, %ecx, %ecx
117 subl %ecx, %eax
118 cmpl $79, %eax
119 jne .LBB1_4
120 # BB#3: # %if.then
121 # in Loop: Header=BB1_2 Depth=2
122 movq stdout(%rip), %rsi
123 movl $10, %edi
124 callq fputc
125 .LBB1_4: # %for.inc
126 # in Loop: Header=BB1_2 Depth=2
127 addq $4, %r12
128 incq %rbx
129 movq stdout(%rip), %rax
130 cmpq $1536, %rbx # imm = 0x600
131 jne .LBB1_2
132 # BB#5: # %for.end
133 # in Loop: Header=BB1_1 Depth=1
134 movl $10, %edi
135 movq %rax, %rsi
136 callq fputc
137 addq $6144, %r15 # imm = 0x1800
138 incq %r14
139 cmpq $1536, %r14 # imm = 0x600
140 jne .LBB1_1
141 # BB#6: # %for.end12
142 popq %rbx
143 popq %r12
144 popq %r14
145 popq %r15
146 popq %rbp
148 .Ltmp16:
149 .size print_array, .Ltmp16-print_array
150 .cfi_endproc
152 .section .rodata.cst8,"aM",@progbits,8
153 .align 8
154 .LCPI2_0:
155 .quad 4602678819172646912 # double 0.5
156 .text
157 .globl main
158 .align 16, 0x90
159 .type main,@function
160 main: # @main
161 .cfi_startproc
162 # BB#0: # %entry
163 pushq %rbp
164 .Ltmp20:
165 .cfi_def_cfa_offset 16
166 .Ltmp21:
167 .cfi_offset %rbp, -16
168 movq %rsp, %rbp
169 .Ltmp22:
170 .cfi_def_cfa_register %rbp
171 pushq %r15
172 pushq %r14
173 pushq %r13
174 pushq %r12
175 pushq %rbx
176 subq $56, %rsp
177 .Ltmp23:
178 .cfi_offset %rbx, -56
179 .Ltmp24:
180 .cfi_offset %r12, -48
181 .Ltmp25:
182 .cfi_offset %r13, -40
183 .Ltmp26:
184 .cfi_offset %r14, -32
185 .Ltmp27:
186 .cfi_offset %r15, -24
187 xorl %ebx, %ebx
188 vmovsd .LCPI2_0(%rip), %xmm0
189 .align 16, 0x90
190 .LBB2_1: # %polly.loop_preheader3.i
191 # =>This Loop Header: Depth=1
192 # Child Loop BB2_2 Depth 2
193 xorl %ecx, %ecx
194 .align 16, 0x90
195 .LBB2_2: # %polly.loop_header2.i
196 # Parent Loop BB2_1 Depth=1
197 # => This Inner Loop Header: Depth=2
198 movl %ecx, %edx
199 imull %ebx, %edx
200 movl %edx, %esi
201 sarl $31, %esi
202 shrl $22, %esi
203 addl %edx, %esi
204 andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
205 negl %esi
206 movq %rbx, %rax
207 shlq $11, %rax
208 leal 1(%rdx,%rsi), %edi
209 leaq (%rax,%rax,2), %rsi
210 leaq 1(%rcx), %rdx
211 cmpq $1536, %rdx # imm = 0x600
212 vcvtsi2sdl %edi, %xmm0, %xmm1
213 vmulsd %xmm0, %xmm1, %xmm1
214 vcvtsd2ss %xmm1, %xmm1, %xmm1
215 vmovss %xmm1, A(%rsi,%rcx,4)
216 vmovss %xmm1, B(%rsi,%rcx,4)
217 movq %rdx, %rcx
218 jne .LBB2_2
219 # BB#3: # %polly.loop_exit4.i
220 # in Loop: Header=BB2_1 Depth=1
221 incq %rbx
222 cmpq $1536, %rbx # imm = 0x600
223 jne .LBB2_1
224 # BB#4: # %polly.loop_preheader3.preheader
225 movl $C, %edi
226 xorl %esi, %esi
227 movl $9437184, %edx # imm = 0x900000
228 callq memset
229 xorl %esi, %esi
230 movl $C+16, %eax
231 movq %rax, -88(%rbp) # 8-byte Spill
232 .align 16, 0x90
233 .LBB2_5: # %polly.loop_preheader17
234 # =>This Loop Header: Depth=1
235 # Child Loop BB2_15 Depth 2
236 # Child Loop BB2_8 Depth 3
237 # Child Loop BB2_11 Depth 4
238 # Child Loop BB2_17 Depth 5
239 # Child Loop BB2_18 Depth 6
240 movq %rsi, -56(%rbp) # 8-byte Spill
241 movq %rsi, %rax
242 orq $63, %rax
243 movq %rax, -72(%rbp) # 8-byte Spill
244 leaq -1(%rax), %rax
245 movq %rax, -48(%rbp) # 8-byte Spill
246 xorl %edx, %edx
247 .align 16, 0x90
248 .LBB2_15: # %polly.loop_preheader24
249 # Parent Loop BB2_5 Depth=1
250 # => This Loop Header: Depth=2
251 # Child Loop BB2_8 Depth 3
252 # Child Loop BB2_11 Depth 4
253 # Child Loop BB2_17 Depth 5
254 # Child Loop BB2_18 Depth 6
255 movq %rdx, -80(%rbp) # 8-byte Spill
256 leaq -4(%rdx), %rcx
257 movq %rdx, %rax
258 decq %rax
259 cmovsq %rcx, %rax
260 movq %rax, %r15
261 sarq $63, %r15
262 shrq $62, %r15
263 addq %rax, %r15
264 andq $-4, %r15
265 movq %rdx, %r13
266 orq $63, %r13
267 leaq -4(%r13), %rdx
268 xorl %r10d, %r10d
269 movq -88(%rbp), %rax # 8-byte Reload
270 leaq (%rax,%r15,4), %rax
271 movq %rax, -64(%rbp) # 8-byte Spill
272 leaq B+16(,%r15,4), %rbx
273 leaq 4(%r15), %r12
274 .align 16, 0x90
275 .LBB2_8: # %polly.loop_header23
276 # Parent Loop BB2_5 Depth=1
277 # Parent Loop BB2_15 Depth=2
278 # => This Loop Header: Depth=3
279 # Child Loop BB2_11 Depth 4
280 # Child Loop BB2_17 Depth 5
281 # Child Loop BB2_18 Depth 6
282 cmpq -72(%rbp), %rsi # 8-byte Folded Reload
283 jg .LBB2_13
284 # BB#9: # %polly.loop_header30.preheader
285 # in Loop: Header=BB2_8 Depth=3
286 movq %r10, %rax
287 orq $63, %rax
288 cmpq %rax, %r10
289 jg .LBB2_13
290 # BB#10: # in Loop: Header=BB2_8 Depth=3
291 decq %rax
292 movq -64(%rbp), %r14 # 8-byte Reload
293 movq -56(%rbp), %r11 # 8-byte Reload
294 .align 16, 0x90
295 .LBB2_11: # %polly.loop_header37.preheader
296 # Parent Loop BB2_5 Depth=1
297 # Parent Loop BB2_15 Depth=2
298 # Parent Loop BB2_8 Depth=3
299 # => This Loop Header: Depth=4
300 # Child Loop BB2_17 Depth 5
301 # Child Loop BB2_18 Depth 6
302 cmpq %r13, %r12
303 movq %rbx, %r8
304 movq %r10, %rsi
305 jg .LBB2_12
306 .align 16, 0x90
307 .LBB2_17: # %polly.loop_header46.preheader
308 # Parent Loop BB2_5 Depth=1
309 # Parent Loop BB2_15 Depth=2
310 # Parent Loop BB2_8 Depth=3
311 # Parent Loop BB2_11 Depth=4
312 # => This Loop Header: Depth=5
313 # Child Loop BB2_18 Depth 6
314 leaq (%r11,%r11,2), %rcx
315 shlq $11, %rcx
316 vbroadcastss A(%rcx,%rsi,4), %xmm0
317 movq %r14, %rdi
318 movq %r8, %r9
319 movq %r15, %rcx
320 .LBB2_18: # %polly.loop_header46
321 # Parent Loop BB2_5 Depth=1
322 # Parent Loop BB2_15 Depth=2
323 # Parent Loop BB2_8 Depth=3
324 # Parent Loop BB2_11 Depth=4
325 # Parent Loop BB2_17 Depth=5
326 # => This Inner Loop Header: Depth=6
327 vmulps (%r9), %xmm0, %xmm1
328 vaddps (%rdi), %xmm1, %xmm1
329 vmovaps %xmm1, (%rdi)
330 addq $16, %rdi
331 addq $16, %r9
332 addq $4, %rcx
333 cmpq %rdx, %rcx
334 jle .LBB2_18
335 # BB#16: # %polly.loop_exit48
336 # in Loop: Header=BB2_17 Depth=5
337 addq $6144, %r8 # imm = 0x1800
338 cmpq %rax, %rsi
339 leaq 1(%rsi), %rsi
340 jle .LBB2_17
341 .align 16, 0x90
342 .LBB2_12: # %polly.loop_exit39
343 # in Loop: Header=BB2_11 Depth=4
344 addq $6144, %r14 # imm = 0x1800
345 cmpq -48(%rbp), %r11 # 8-byte Folded Reload
346 leaq 1(%r11), %r11
347 jle .LBB2_11
348 .align 16, 0x90
349 .LBB2_13: # %polly.loop_exit32
350 # in Loop: Header=BB2_8 Depth=3
351 addq $393216, %rbx # imm = 0x60000
352 cmpq $1472, %r10 # imm = 0x5C0
353 leaq 64(%r10), %r10
354 movq -56(%rbp), %rsi # 8-byte Reload
355 jl .LBB2_8
356 # BB#14: # %polly.loop_exit25
357 # in Loop: Header=BB2_15 Depth=2
358 movq -80(%rbp), %rdx # 8-byte Reload
359 cmpq $1472, %rdx # imm = 0x5C0
360 leaq 64(%rdx), %rdx
361 jl .LBB2_15
362 # BB#6: # %polly.loop_exit18
363 # in Loop: Header=BB2_5 Depth=1
364 addq $393216, -88(%rbp) # 8-byte Folded Spill
365 # imm = 0x60000
366 cmpq $1472, %rsi # imm = 0x5C0
367 leaq 64(%rsi), %rsi
368 jl .LBB2_5
369 # BB#7: # %polly.loop_exit11
370 xorl %eax, %eax
371 addq $56, %rsp
372 popq %rbx
373 popq %r12
374 popq %r13
375 popq %r14
376 popq %r15
377 popq %rbp
379 .Ltmp28:
380 .size main, .Ltmp28-main
381 .cfi_endproc
383 .type A,@object # @A
384 .comm A,9437184,16
385 .type B,@object # @B
386 .comm B,9437184,16
387 .type .L.str,@object # @.str
388 .section .rodata.str1.1,"aMS",@progbits,1
389 .L.str:
390 .asciz "%lf "
391 .size .L.str, 5
393 .type C,@object # @C
394 .comm C,9437184,16
396 .section ".note.GNU-stack","",@progbits