Move www/experiments to docs/experiments
[polly-mirror.git] / docs / experiments / matmul / matmul.normalopt.s
blob079af702a14fc7310ecd730908adde0255a02a49
1 .file "matmul.normalopt.ll"
2 .section .rodata.cst8,"aM",@progbits,8
3 .align 8
4 .LCPI0_0:
5 .quad 4602678819172646912 # double 0.5
6 .text
7 .globl init_array
8 .align 16, 0x90
9 .type init_array,@function
10 init_array: # @init_array
11 .cfi_startproc
12 # BB#0: # %entry
13 pushq %rbp
14 .Ltmp2:
15 .cfi_def_cfa_offset 16
16 .Ltmp3:
17 .cfi_offset %rbp, -16
18 movq %rsp, %rbp
19 .Ltmp4:
20 .cfi_def_cfa_register %rbp
21 xorl %r8d, %r8d
22 vmovsd .LCPI0_0(%rip), %xmm0
23 .align 16, 0x90
24 .LBB0_1: # %for.cond1.preheader
25 # =>This Loop Header: Depth=1
26 # Child Loop BB0_2 Depth 2
27 xorl %ecx, %ecx
28 .align 16, 0x90
29 .LBB0_2: # %for.body3
30 # Parent Loop BB0_1 Depth=1
31 # => This Inner Loop Header: Depth=2
32 movl %ecx, %edx
33 imull %r8d, %edx
34 movl %edx, %esi
35 sarl $31, %esi
36 shrl $22, %esi
37 addl %edx, %esi
38 andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
39 negl %esi
40 movq %r8, %rax
41 shlq $11, %rax
42 leal 1(%rdx,%rsi), %edi
43 leaq (%rax,%rax,2), %rsi
44 leaq 1(%rcx), %rdx
45 cmpq $1536, %rdx # imm = 0x600
46 vcvtsi2sdl %edi, %xmm0, %xmm1
47 vmulsd %xmm0, %xmm1, %xmm1
48 vcvtsd2ss %xmm1, %xmm1, %xmm1
49 vmovss %xmm1, A(%rsi,%rcx,4)
50 vmovss %xmm1, B(%rsi,%rcx,4)
51 movq %rdx, %rcx
52 jne .LBB0_2
53 # BB#3: # %for.inc17
54 # in Loop: Header=BB0_1 Depth=1
55 incq %r8
56 cmpq $1536, %r8 # imm = 0x600
57 jne .LBB0_1
58 # BB#4: # %for.end19
59 popq %rbp
60 ret
61 .Ltmp5:
62 .size init_array, .Ltmp5-init_array
63 .cfi_endproc
65 .globl print_array
66 .align 16, 0x90
67 .type print_array,@function
68 print_array: # @print_array
69 .cfi_startproc
70 # BB#0: # %entry
71 pushq %rbp
72 .Ltmp9:
73 .cfi_def_cfa_offset 16
74 .Ltmp10:
75 .cfi_offset %rbp, -16
76 movq %rsp, %rbp
77 .Ltmp11:
78 .cfi_def_cfa_register %rbp
79 pushq %r15
80 pushq %r14
81 pushq %r12
82 pushq %rbx
83 .Ltmp12:
84 .cfi_offset %rbx, -48
85 .Ltmp13:
86 .cfi_offset %r12, -40
87 .Ltmp14:
88 .cfi_offset %r14, -32
89 .Ltmp15:
90 .cfi_offset %r15, -24
91 xorl %r14d, %r14d
92 movl $C, %r15d
93 .align 16, 0x90
94 .LBB1_1: # %for.cond1.preheader
95 # =>This Loop Header: Depth=1
96 # Child Loop BB1_2 Depth 2
97 movq stdout(%rip), %rax
98 movq %r15, %r12
99 xorl %ebx, %ebx
100 .align 16, 0x90
101 .LBB1_2: # %for.body3
102 # Parent Loop BB1_1 Depth=1
103 # => This Inner Loop Header: Depth=2
104 vmovss (%r12), %xmm0
105 vcvtss2sd %xmm0, %xmm0, %xmm0
106 movq %rax, %rdi
107 movl $.L.str, %esi
108 movb $1, %al
109 callq fprintf
110 movslq %ebx, %rax
111 imulq $1717986919, %rax, %rcx # imm = 0x66666667
112 movq %rcx, %rdx
113 shrq $63, %rdx
114 sarq $37, %rcx
115 addl %edx, %ecx
116 imull $80, %ecx, %ecx
117 subl %ecx, %eax
118 cmpl $79, %eax
119 jne .LBB1_4
120 # BB#3: # %if.then
121 # in Loop: Header=BB1_2 Depth=2
122 movq stdout(%rip), %rsi
123 movl $10, %edi
124 callq fputc
125 .LBB1_4: # %for.inc
126 # in Loop: Header=BB1_2 Depth=2
127 addq $4, %r12
128 incq %rbx
129 movq stdout(%rip), %rax
130 cmpq $1536, %rbx # imm = 0x600
131 jne .LBB1_2
132 # BB#5: # %for.end
133 # in Loop: Header=BB1_1 Depth=1
134 movl $10, %edi
135 movq %rax, %rsi
136 callq fputc
137 addq $6144, %r15 # imm = 0x1800
138 incq %r14
139 cmpq $1536, %r14 # imm = 0x600
140 jne .LBB1_1
141 # BB#6: # %for.end12
142 popq %rbx
143 popq %r12
144 popq %r14
145 popq %r15
146 popq %rbp
148 .Ltmp16:
149 .size print_array, .Ltmp16-print_array
150 .cfi_endproc
152 .section .rodata.cst8,"aM",@progbits,8
153 .align 8
154 .LCPI2_0:
155 .quad 4602678819172646912 # double 0.5
156 .text
157 .globl main
158 .align 16, 0x90
159 .type main,@function
160 main: # @main
161 .cfi_startproc
162 # BB#0: # %entry
163 pushq %rbp
164 .Ltmp19:
165 .cfi_def_cfa_offset 16
166 .Ltmp20:
167 .cfi_offset %rbp, -16
168 movq %rsp, %rbp
169 .Ltmp21:
170 .cfi_def_cfa_register %rbp
171 xorl %r8d, %r8d
172 vmovsd .LCPI2_0(%rip), %xmm0
173 .align 16, 0x90
174 .LBB2_1: # %for.cond1.preheader.i
175 # =>This Loop Header: Depth=1
176 # Child Loop BB2_2 Depth 2
177 xorl %ecx, %ecx
178 .align 16, 0x90
179 .LBB2_2: # %for.body3.i
180 # Parent Loop BB2_1 Depth=1
181 # => This Inner Loop Header: Depth=2
182 movl %ecx, %edx
183 imull %r8d, %edx
184 movl %edx, %esi
185 sarl $31, %esi
186 shrl $22, %esi
187 addl %edx, %esi
188 andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
189 negl %esi
190 movq %r8, %rax
191 shlq $11, %rax
192 leal 1(%rdx,%rsi), %edi
193 leaq (%rax,%rax,2), %rsi
194 leaq 1(%rcx), %rdx
195 cmpq $1536, %rdx # imm = 0x600
196 vcvtsi2sdl %edi, %xmm0, %xmm1
197 vmulsd %xmm0, %xmm1, %xmm1
198 vcvtsd2ss %xmm1, %xmm1, %xmm1
199 vmovss %xmm1, A(%rsi,%rcx,4)
200 vmovss %xmm1, B(%rsi,%rcx,4)
201 movq %rdx, %rcx
202 jne .LBB2_2
203 # BB#3: # %for.inc17.i
204 # in Loop: Header=BB2_1 Depth=1
205 incq %r8
206 cmpq $1536, %r8 # imm = 0x600
207 jne .LBB2_1
208 # BB#4:
209 xorl %r8d, %r8d
210 movl $A, %r9d
211 .align 16, 0x90
212 .LBB2_5: # %for.cond1.preheader
213 # =>This Loop Header: Depth=1
214 # Child Loop BB2_6 Depth 2
215 # Child Loop BB2_7 Depth 3
216 leaq (%r8,%r8,2), %rdx
217 shlq $11, %rdx
218 leaq C(%rdx), %rsi
219 xorl %edi, %edi
220 .align 16, 0x90
221 .LBB2_6: # %for.body3
222 # Parent Loop BB2_5 Depth=1
223 # => This Loop Header: Depth=2
224 # Child Loop BB2_7 Depth 3
225 movl $0, (%rsi)
226 vxorps %xmm0, %xmm0, %xmm0
227 movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
228 movq %r9, %rcx
229 .align 16, 0x90
230 .LBB2_7: # %for.body8
231 # Parent Loop BB2_5 Depth=1
232 # Parent Loop BB2_6 Depth=2
233 # => This Inner Loop Header: Depth=3
234 vmovss (%rcx), %xmm1
235 vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
236 vaddss %xmm1, %xmm0, %xmm0
237 addq $4, %rcx
238 addq $6144, %rax # imm = 0x1800
239 jne .LBB2_7
240 # BB#8: # %for.inc25
241 # in Loop: Header=BB2_6 Depth=2
242 vmovss %xmm0, (%rsi)
243 leaq C+4(%rdx,%rdi,4), %rsi
244 incq %rdi
245 cmpq $1536, %rdi # imm = 0x600
246 jne .LBB2_6
247 # BB#9: # %for.inc28
248 # in Loop: Header=BB2_5 Depth=1
249 addq $6144, %r9 # imm = 0x1800
250 incq %r8
251 cmpq $1536, %r8 # imm = 0x600
252 jne .LBB2_5
253 # BB#10: # %for.end30
254 xorl %eax, %eax
255 popq %rbp
257 .Ltmp22:
258 .size main, .Ltmp22-main
259 .cfi_endproc
261 .type A,@object # @A
262 .comm A,9437184,16
263 .type B,@object # @B
264 .comm B,9437184,16
265 .type .L.str,@object # @.str
266 .section .rodata.str1.1,"aMS",@progbits,1
267 .L.str:
268 .asciz "%lf "
269 .size .L.str, 5
271 .type C,@object # @C
272 .comm C,9437184,16
274 .section ".note.GNU-stack","",@progbits