1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-outer-fir.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-outer-fir.s > vect-outer-fir.ll
6 ; ModuleID = 'vect-outer-fir.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @out = common global [40 x float] zeroinitializer, align 16
11 @in = common global [168 x float] zeroinitializer, align 16
12 @coeff = common global [128 x float] zeroinitializer, align 16
13 @fir_out = common global [40 x float] zeroinitializer, align 16
15 define void @foo() nounwind uwtable noinline {
19 for.cond: ; preds = %for.inc, %entry
20 %indvar20 = phi i64 [ %indvar.next21, %for.inc ], [ 0, %entry ]
21 %arrayidx = getelementptr [40 x float]* @out, i64 0, i64 %indvar20
22 %exitcond22 = icmp ne i64 %indvar20, 40
23 br i1 %exitcond22, label %for.body, label %for.end
25 for.body: ; preds = %for.cond
26 store float 0.000000e+00, float* %arrayidx, align 4
29 for.inc: ; preds = %for.body
30 %indvar.next21 = add i64 %indvar20, 1
33 for.end: ; preds = %for.cond
36 for.cond3: ; preds = %for.inc41, %for.end
37 %indvar1 = phi i64 [ %indvar.next2, %for.inc41 ], [ 0, %for.end ]
38 %exitcond13 = icmp ne i64 %indvar1, 4
39 br i1 %exitcond13, label %for.body6, label %for.end44
41 for.body6: ; preds = %for.cond3
44 for.cond7: ; preds = %for.inc37, %for.body6
45 %indvar4 = phi i64 [ %indvar.next5, %for.inc37 ], [ 0, %for.body6 ]
46 %tmp15 = add i64 %indvar1, %indvar4
47 %arrayidx34 = getelementptr [40 x float]* @out, i64 0, i64 %indvar4
48 %exitcond = icmp ne i64 %indvar4, 40
49 br i1 %exitcond, label %for.body10, label %for.end40
51 for.body10: ; preds = %for.cond7
54 for.cond12: ; preds = %for.inc27, %for.body10
55 %indvar = phi i64 [ %indvar.next, %for.inc27 ], [ 0, %for.body10 ]
56 %diff.0 = phi float [ 0.000000e+00, %for.body10 ], [ %add26, %for.inc27 ]
57 %tmp14 = mul i64 %indvar, 4
58 %tmp16 = add i64 %tmp15, %tmp14
59 %arrayidx19 = getelementptr [168 x float]* @in, i64 0, i64 %tmp16
60 %tmp17 = add i64 %indvar1, %tmp14
61 %j.0 = trunc i64 %tmp17 to i32
62 %arrayidx23 = getelementptr [128 x float]* @coeff, i64 0, i64 %tmp17
63 %cmp14 = icmp slt i32 %j.0, 128
64 br i1 %cmp14, label %for.body15, label %for.end30
66 for.body15: ; preds = %for.cond12
69 for.inc27: ; preds = %for.body15
70 %tmp24 = load float* %arrayidx23, align 4
71 %tmp20 = load float* %arrayidx19, align 4
72 %mul = fmul float %tmp20, %tmp24
73 %add26 = fadd float %diff.0, %mul
74 %indvar.next = add i64 %indvar, 1
77 for.end30: ; preds = %for.cond12
78 %diff.0.lcssa = phi float [ %diff.0, %for.cond12 ]
79 %tmp35 = load float* %arrayidx34, align 4
80 %add36 = fadd float %tmp35, %diff.0.lcssa
81 store float %add36, float* %arrayidx34, align 4
84 for.inc37: ; preds = %for.end30
85 %indvar.next5 = add i64 %indvar4, 1
88 for.end40: ; preds = %for.cond7
91 for.inc41: ; preds = %for.end40
92 %indvar.next2 = add i64 %indvar1, 1
95 for.end44: ; preds = %for.cond3
99 define void @fir() nounwind uwtable noinline {
103 for.cond: ; preds = %for.inc19, %entry
104 %indvar1 = phi i64 [ %indvar.next2, %for.inc19 ], [ 0, %entry ]
105 %arrayidx18 = getelementptr [40 x float]* @fir_out, i64 0, i64 %indvar1
106 %exitcond4 = icmp ne i64 %indvar1, 40
107 br i1 %exitcond4, label %for.body, label %for.end22
109 for.body: ; preds = %for.cond
112 for.cond1: ; preds = %for.inc, %for.body
113 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
114 %diff.0 = phi float [ 0.000000e+00, %for.body ], [ %add13, %for.inc ]
115 %tmp5 = add i64 %indvar1, %indvar
116 %arrayidx = getelementptr [168 x float]* @in, i64 0, i64 %tmp5
117 %arrayidx10 = getelementptr [128 x float]* @coeff, i64 0, i64 %indvar
118 %exitcond = icmp ne i64 %indvar, 128
119 br i1 %exitcond, label %for.body4, label %for.end
121 for.body4: ; preds = %for.cond1
124 for.inc: ; preds = %for.body4
125 %tmp11 = load float* %arrayidx10, align 4
126 %tmp7 = load float* %arrayidx, align 4
127 %mul = fmul float %tmp7, %tmp11
128 %add13 = fadd float %diff.0, %mul
129 %indvar.next = add i64 %indvar, 1
132 for.end: ; preds = %for.cond1
133 %diff.0.lcssa = phi float [ %diff.0, %for.cond1 ]
134 store float %diff.0.lcssa, float* %arrayidx18, align 4
137 for.inc19: ; preds = %for.end
138 %indvar.next2 = add i64 %indvar1, 1
141 for.end22: ; preds = %for.cond
145 define i32 @main() nounwind uwtable {
147 call void @check_vect()
150 for.cond: ; preds = %for.inc, %entry
151 %indvar3 = phi i64 [ %indvar.next4, %for.inc ], [ 0, %entry ]
152 %arrayidx = getelementptr [128 x float]* @coeff, i64 0, i64 %indvar3
153 %i.0 = trunc i64 %indvar3 to i32
154 %exitcond5 = icmp ne i64 %indvar3, 128
155 br i1 %exitcond5, label %for.body, label %for.end
157 for.body: ; preds = %for.cond
158 %conv = sitofp i32 %i.0 to float
159 store float %conv, float* %arrayidx, align 4
162 for.inc: ; preds = %for.body
163 %indvar.next4 = add i64 %indvar3, 1
166 for.end: ; preds = %for.cond
169 for.cond4: ; preds = %for.inc14, %for.end
170 %indvar1 = phi i64 [ %indvar.next2, %for.inc14 ], [ 0, %for.end ]
171 %arrayidx13 = getelementptr [168 x float]* @in, i64 0, i64 %indvar1
172 %i.1 = trunc i64 %indvar1 to i32
173 %exitcond = icmp ne i64 %indvar1, 168
174 br i1 %exitcond, label %for.body8, label %for.end17
176 for.body8: ; preds = %for.cond4
177 %conv10 = sitofp i32 %i.1 to float
178 store float %conv10, float* %arrayidx13, align 4
181 for.inc14: ; preds = %for.body8
182 %indvar.next2 = add i64 %indvar1, 1
185 for.end17: ; preds = %for.cond4
190 for.cond18: ; preds = %for.inc33, %for.end17
191 %indvar = phi i64 [ %indvar.next, %for.inc33 ], [ 0, %for.end17 ]
192 %arrayidx29 = getelementptr [40 x float]* @fir_out, i64 0, i64 %indvar
193 %arrayidx25 = getelementptr [40 x float]* @out, i64 0, i64 %indvar
194 %i.2 = trunc i64 %indvar to i32
195 %cmp20 = icmp slt i32 %i.2, 40
196 br i1 %cmp20, label %for.body22, label %for.end36
198 for.body22: ; preds = %for.cond18
199 %tmp26 = load float* %arrayidx25, align 4
200 %tmp30 = load float* %arrayidx29, align 4
201 %cmp31 = fcmp une float %tmp26, %tmp30
202 br i1 %cmp31, label %if.then, label %if.end
204 if.then: ; preds = %for.body22
205 call void @abort() noreturn nounwind
208 if.end: ; preds = %for.body22
211 for.inc33: ; preds = %if.end
212 %indvar.next = add i64 %indvar, 1
215 for.end36: ; preds = %for.cond18
219 define internal void @check_vect() nounwind uwtable noinline {
221 %a = alloca i32, align 4
222 %b = alloca i32, align 4
223 %c = alloca i32, align 4
224 %d = alloca i32, align 4
225 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
226 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
227 %tobool = icmp eq i32 %call1, 0
228 br i1 %tobool, label %if.then, label %lor.lhs.false
230 lor.lhs.false: ; preds = %entry
231 %tmp4 = load i32* %d, align 4
232 %and6 = and i32 %tmp4, 67108864
233 %cmp = icmp eq i32 %and6, 0
234 br i1 %cmp, label %if.then, label %if.end
236 if.then: ; preds = %entry, %lor.lhs.false
237 call void @exit(i32 0) noreturn nounwind
240 if.end: ; preds = %lor.lhs.false
241 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
245 declare void @abort() noreturn
247 declare void (i32)* @signal(i32, void (i32)*) nounwind
249 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
251 call void @exit(i32 0) noreturn nounwind
254 return: ; No predecessors!
258 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
260 %and = and i32 %__level, -2147483648
261 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
262 %cmp = icmp ult i32 %call, %__level
263 br i1 %cmp, label %if.then, label %if.end
265 if.then: ; preds = %entry
268 if.end: ; preds = %entry
269 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !0
270 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
271 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
272 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
273 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
274 store i32 %asmresult, i32* %__eax, align 4
275 store i32 %asmresult8, i32* %__ebx, align 4
276 store i32 %asmresult9, i32* %__ecx, align 4
277 store i32 %asmresult10, i32* %__edx, align 4
280 return: ; preds = %if.end, %if.then
281 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
285 declare void @exit(i32) noreturn
287 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
289 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !1
290 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
291 %tobool = icmp eq i32* %__sig, null
292 br i1 %tobool, label %if.end, label %if.then
294 if.then: ; preds = %entry
295 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
296 store i32 %asmresult1, i32* %__sig, align 4
299 if.end: ; preds = %entry, %if.then
303 !0 = metadata !{i32 -2147343105, i32 -2147343097}
304 !1 = metadata !{i32 -2147343228, i32 -2147343220}