Move www/experiments to docs/experiments
[polly-mirror.git] / docs / experiments / matmul / matmul.s
blob17147be244769dde6c3e060478f001d72725c5e4
1 ; ModuleID = 'matmul.c'
2 source_filename = "matmul.c"
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
6 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
7 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
9 @A = common global [1536 x [1536 x float]] zeroinitializer, align 16
10 @B = common global [1536 x [1536 x float]] zeroinitializer, align 16
11 @stdout = external global %struct._IO_FILE*, align 8
12 @.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
13 @C = common global [1536 x [1536 x float]] zeroinitializer, align 16
14 @.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
16 ; Function Attrs: nounwind uwtable
17 define void @init_array() #0 {
18 entry:
19 %i = alloca i32, align 4
20 %j = alloca i32, align 4
21 store i32 0, i32* %i, align 4
22 br label %for.cond
24 for.cond: ; preds = %for.inc17, %entry
25 %0 = load i32, i32* %i, align 4
26 %cmp = icmp slt i32 %0, 1536
27 br i1 %cmp, label %for.body, label %for.end19
29 for.body: ; preds = %for.cond
30 store i32 0, i32* %j, align 4
31 br label %for.cond1
33 for.cond1: ; preds = %for.inc, %for.body
34 %1 = load i32, i32* %j, align 4
35 %cmp2 = icmp slt i32 %1, 1536
36 br i1 %cmp2, label %for.body3, label %for.end
38 for.body3: ; preds = %for.cond1
39 %2 = load i32, i32* %i, align 4
40 %3 = load i32, i32* %j, align 4
41 %mul = mul nsw i32 %2, %3
42 %rem = srem i32 %mul, 1024
43 %add = add nsw i32 1, %rem
44 %conv = sitofp i32 %add to double
45 %div = fdiv double %conv, 2.000000e+00
46 %conv4 = fptrunc double %div to float
47 %4 = load i32, i32* %j, align 4
48 %idxprom = sext i32 %4 to i64
49 %5 = load i32, i32* %i, align 4
50 %idxprom5 = sext i32 %5 to i64
51 %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
52 %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
53 store float %conv4, float* %arrayidx6, align 4
54 %6 = load i32, i32* %i, align 4
55 %7 = load i32, i32* %j, align 4
56 %mul7 = mul nsw i32 %6, %7
57 %rem8 = srem i32 %mul7, 1024
58 %add9 = add nsw i32 1, %rem8
59 %conv10 = sitofp i32 %add9 to double
60 %div11 = fdiv double %conv10, 2.000000e+00
61 %conv12 = fptrunc double %div11 to float
62 %8 = load i32, i32* %j, align 4
63 %idxprom13 = sext i32 %8 to i64
64 %9 = load i32, i32* %i, align 4
65 %idxprom14 = sext i32 %9 to i64
66 %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
67 %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
68 store float %conv12, float* %arrayidx16, align 4
69 br label %for.inc
71 for.inc: ; preds = %for.body3
72 %10 = load i32, i32* %j, align 4
73 %inc = add nsw i32 %10, 1
74 store i32 %inc, i32* %j, align 4
75 br label %for.cond1
77 for.end: ; preds = %for.cond1
78 br label %for.inc17
80 for.inc17: ; preds = %for.end
81 %11 = load i32, i32* %i, align 4
82 %inc18 = add nsw i32 %11, 1
83 store i32 %inc18, i32* %i, align 4
84 br label %for.cond
86 for.end19: ; preds = %for.cond
87 ret void
90 ; Function Attrs: nounwind uwtable
91 define void @print_array() #0 {
92 entry:
93 %i = alloca i32, align 4
94 %j = alloca i32, align 4
95 store i32 0, i32* %i, align 4
96 br label %for.cond
98 for.cond: ; preds = %for.inc10, %entry
99 %0 = load i32, i32* %i, align 4
100 %cmp = icmp slt i32 %0, 1536
101 br i1 %cmp, label %for.body, label %for.end12
103 for.body: ; preds = %for.cond
104 store i32 0, i32* %j, align 4
105 br label %for.cond1
107 for.cond1: ; preds = %for.inc, %for.body
108 %1 = load i32, i32* %j, align 4
109 %cmp2 = icmp slt i32 %1, 1536
110 br i1 %cmp2, label %for.body3, label %for.end
112 for.body3: ; preds = %for.cond1
113 %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
114 %3 = load i32, i32* %j, align 4
115 %idxprom = sext i32 %3 to i64
116 %4 = load i32, i32* %i, align 4
117 %idxprom4 = sext i32 %4 to i64
118 %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
119 %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
120 %5 = load float, float* %arrayidx5, align 4
121 %conv = fpext float %5 to double
122 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
123 %6 = load i32, i32* %j, align 4
124 %rem = srem i32 %6, 80
125 %cmp6 = icmp eq i32 %rem, 79
126 br i1 %cmp6, label %if.then, label %if.end
128 if.then: ; preds = %for.body3
129 %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
130 %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
131 br label %if.end
133 if.end: ; preds = %if.then, %for.body3
134 br label %for.inc
136 for.inc: ; preds = %if.end
137 %8 = load i32, i32* %j, align 4
138 %inc = add nsw i32 %8, 1
139 store i32 %inc, i32* %j, align 4
140 br label %for.cond1
142 for.end: ; preds = %for.cond1
143 %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
144 %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
145 br label %for.inc10
147 for.inc10: ; preds = %for.end
148 %10 = load i32, i32* %i, align 4
149 %inc11 = add nsw i32 %10, 1
150 store i32 %inc11, i32* %i, align 4
151 br label %for.cond
153 for.end12: ; preds = %for.cond
154 ret void
157 declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
159 ; Function Attrs: nounwind uwtable
160 define i32 @main() #0 {
161 entry:
162 %retval = alloca i32, align 4
163 %i = alloca i32, align 4
164 %j = alloca i32, align 4
165 %k = alloca i32, align 4
166 %t_start = alloca double, align 8
167 %t_end = alloca double, align 8
168 store i32 0, i32* %retval, align 4
169 call void @init_array()
170 store i32 0, i32* %i, align 4
171 br label %for.cond
173 for.cond: ; preds = %for.inc28, %entry
174 %0 = load i32, i32* %i, align 4
175 %cmp = icmp slt i32 %0, 1536
176 br i1 %cmp, label %for.body, label %for.end30
178 for.body: ; preds = %for.cond
179 store i32 0, i32* %j, align 4
180 br label %for.cond1
182 for.cond1: ; preds = %for.inc25, %for.body
183 %1 = load i32, i32* %j, align 4
184 %cmp2 = icmp slt i32 %1, 1536
185 br i1 %cmp2, label %for.body3, label %for.end27
187 for.body3: ; preds = %for.cond1
188 %2 = load i32, i32* %j, align 4
189 %idxprom = sext i32 %2 to i64
190 %3 = load i32, i32* %i, align 4
191 %idxprom4 = sext i32 %3 to i64
192 %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
193 %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
194 store float 0.000000e+00, float* %arrayidx5, align 4
195 store i32 0, i32* %k, align 4
196 br label %for.cond6
198 for.cond6: ; preds = %for.inc, %for.body3
199 %4 = load i32, i32* %k, align 4
200 %cmp7 = icmp slt i32 %4, 1536
201 br i1 %cmp7, label %for.body8, label %for.end
203 for.body8: ; preds = %for.cond6
204 %5 = load i32, i32* %j, align 4
205 %idxprom9 = sext i32 %5 to i64
206 %6 = load i32, i32* %i, align 4
207 %idxprom10 = sext i32 %6 to i64
208 %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
209 %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
210 %7 = load float, float* %arrayidx12, align 4
211 %8 = load i32, i32* %k, align 4
212 %idxprom13 = sext i32 %8 to i64
213 %9 = load i32, i32* %i, align 4
214 %idxprom14 = sext i32 %9 to i64
215 %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
216 %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
217 %10 = load float, float* %arrayidx16, align 4
218 %11 = load i32, i32* %j, align 4
219 %idxprom17 = sext i32 %11 to i64
220 %12 = load i32, i32* %k, align 4
221 %idxprom18 = sext i32 %12 to i64
222 %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
223 %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
224 %13 = load float, float* %arrayidx20, align 4
225 %mul = fmul float %10, %13
226 %add = fadd float %7, %mul
227 %14 = load i32, i32* %j, align 4
228 %idxprom21 = sext i32 %14 to i64
229 %15 = load i32, i32* %i, align 4
230 %idxprom22 = sext i32 %15 to i64
231 %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
232 %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
233 store float %add, float* %arrayidx24, align 4
234 br label %for.inc
236 for.inc: ; preds = %for.body8
237 %16 = load i32, i32* %k, align 4
238 %inc = add nsw i32 %16, 1
239 store i32 %inc, i32* %k, align 4
240 br label %for.cond6
242 for.end: ; preds = %for.cond6
243 br label %for.inc25
245 for.inc25: ; preds = %for.end
246 %17 = load i32, i32* %j, align 4
247 %inc26 = add nsw i32 %17, 1
248 store i32 %inc26, i32* %j, align 4
249 br label %for.cond1
251 for.end27: ; preds = %for.cond1
252 br label %for.inc28
254 for.inc28: ; preds = %for.end27
255 %18 = load i32, i32* %i, align 4
256 %inc29 = add nsw i32 %18, 1
257 store i32 %inc29, i32* %i, align 4
258 br label %for.cond
260 for.end30: ; preds = %for.cond
261 ret i32 0
264 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
265 attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
267 !llvm.ident = !{!0}
269 !0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}