1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 slp-perm-5.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer slp-perm-5.s > slp-perm-5.ll
6 ; ModuleID = 'slp-perm-5.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @main.check_results = internal unnamed_addr constant [16 x i32] [i32 1470, i32 395, i32 28271, i32 5958, i32 1655, i32 111653, i32 10446, i32 2915, i32 195035, i32 14934, i32 4175, i32 278417, i32 19422, i32 5435, i32 361799, i32 0], align 16
11 @main.check_results2 = internal unnamed_addr constant [16 x i32] [i32 4322, i32 135, i32 13776, i32 629, i32 23230, i32 1123, i32 32684, i32 1617, i32 42138, i32 2111, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
13 define void @foo(i32* noalias %pInput, i32* noalias %pOutput, i32* noalias %pInput2, i32* noalias %pOutput2) nounwind uwtable {
17 for.cond: ; preds = %for.inc, %entry
18 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
19 %tmp = mul i64 %indvar, 2
20 %tmp1 = add i64 %tmp, 1
21 %incdec.ptr49 = getelementptr i32* %pOutput2, i64 %tmp1
22 %pOutput2.addr.0 = getelementptr i32* %pOutput2, i64 %tmp
23 %incdec.ptr10 = getelementptr i32* %pInput2, i64 %tmp1
24 %pInput2.addr.0 = getelementptr i32* %pInput2, i64 %tmp
25 %tmp3 = mul i64 %indvar, 3
26 %tmp4 = add i64 %tmp3, 2
27 %incdec.ptr32 = getelementptr i32* %pOutput, i64 %tmp4
28 %tmp6 = add i64 %tmp3, 1
29 %incdec.ptr22 = getelementptr i32* %pOutput, i64 %tmp6
30 %pOutput.addr.0 = getelementptr i32* %pOutput, i64 %tmp3
31 %incdec.ptr4 = getelementptr i32* %pInput, i64 %tmp4
32 %incdec.ptr = getelementptr i32* %pInput, i64 %tmp6
33 %pInput.addr.0 = getelementptr i32* %pInput, i64 %tmp3
34 %exitcond = icmp ne i64 %indvar, 5
35 br i1 %exitcond, label %for.body, label %for.end
37 for.body: ; preds = %for.cond
38 %tmp2 = load i32* %pInput.addr.0, align 4
39 %tmp5 = load i32* %incdec.ptr, align 4
40 %tmp8 = load i32* %incdec.ptr4, align 4
41 %tmp11 = load i32* %pInput2.addr.0, align 4
42 %tmp14 = load i32* %incdec.ptr10, align 4
43 %mul = mul nsw i32 %tmp2, 100
44 %mul17 = mul nsw i32 %tmp5, 1322
45 %add = add nsw i32 %mul, %mul17
46 %mul19 = mul nsw i32 %tmp8, 74
47 %add20 = add nsw i32 %add, %mul19
48 store i32 %add20, i32* %pOutput.addr.0, align 4
49 %mul24 = mul nsw i32 %tmp2, 216
50 %mul26 = mul nsw i32 %tmp5, 13
51 %add27 = add nsw i32 %mul24, %mul26
52 %mul29 = mul nsw i32 %tmp8, 191
53 %add30 = add nsw i32 %add27, %mul29
54 store i32 %add30, i32* %incdec.ptr22, align 4
55 %mul34 = mul nsw i32 %tmp2, 23
56 %mul36 = mul nsw i32 %tmp5, 27271
57 %add37 = add nsw i32 %mul34, %mul36
58 %mul39 = mul nsw i32 %tmp8, 500
59 %add40 = add nsw i32 %add37, %mul39
60 store i32 %add40, i32* %incdec.ptr32, align 4
61 %mul44 = mul nsw i32 %tmp11, 405
62 %mul46 = mul nsw i32 %tmp14, 4322
63 %add47 = add nsw i32 %mul44, %mul46
64 store i32 %add47, i32* %pOutput2.addr.0, align 4
65 %mul51 = mul nsw i32 %tmp11, 112
66 %mul53 = mul nsw i32 %tmp14, 135
67 %add54 = add nsw i32 %mul51, %mul53
68 store i32 %add54, i32* %incdec.ptr49, align 4
71 for.inc: ; preds = %for.body
72 %indvar.next = add i64 %indvar, 1
75 for.end: ; preds = %for.cond
79 define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
81 %input = alloca [16 x i32], align 16
82 %output = alloca [16 x i32], align 16
83 %check_results = alloca [16 x i32], align 16
84 %input2 = alloca [16 x i32], align 16
85 %output2 = alloca [16 x i32], align 16
86 %check_results2 = alloca [16 x i32], align 16
87 %tmp = bitcast [16 x i32]* %check_results to i8*
88 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([16 x i32]* @main.check_results to i8*), i64 64, i32 16, i1 false)
89 %tmp4 = bitcast [16 x i32]* %check_results2 to i8*
90 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp4, i8* bitcast ([16 x i32]* @main.check_results2 to i8*), i64 64, i32 16, i1 false)
91 call void @check_vect()
92 call void @check_vect()
95 for.cond: ; preds = %for.inc, %entry
96 %indvar2 = phi i64 [ %indvar.next3, %for.inc ], [ 0, %entry ]
97 %tmp6 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
98 %arrayidx18 = getelementptr [16 x i32]* %output2, i64 0, i64 %indvar2
99 %arrayidx15 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar2
100 %arrayidx12 = getelementptr [16 x i32]* %input2, i64 0, i64 %indvar2
101 %arrayidx = getelementptr [16 x i32]* %input, i64 0, i64 %indvar2
102 %exitcond = icmp ne i64 %indvar2, 16
103 br i1 %exitcond, label %for.body, label %for.end
105 for.body: ; preds = %for.cond
106 store i32 %tmp6, i32* %arrayidx, align 4
107 store i32 %tmp6, i32* %arrayidx12, align 4
108 store i32 0, i32* %arrayidx15, align 4
109 store i32 0, i32* %arrayidx18, align 4
110 call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
113 for.inc: ; preds = %for.body
114 %inc = add nsw i32 %tmp6, 1
115 %indvar.next3 = add i64 %indvar2, 1
118 for.end: ; preds = %for.cond
119 %arraydecay = getelementptr inbounds [16 x i32]* %input, i64 0, i64 0
120 %arraydecay20 = getelementptr inbounds [16 x i32]* %output, i64 0, i64 0
121 %arraydecay21 = getelementptr inbounds [16 x i32]* %input2, i64 0, i64 0
122 %arraydecay22 = getelementptr inbounds [16 x i32]* %output2, i64 0, i64 0
123 call void @foo(i32* %arraydecay, i32* %arraydecay20, i32* %arraydecay21, i32* %arraydecay22)
126 for.cond23: ; preds = %for.inc45, %for.end
127 %indvar = phi i64 [ %indvar.next, %for.inc45 ], [ 0, %for.end ]
128 %arrayidx42 = getelementptr [16 x i32]* %check_results2, i64 0, i64 %indvar
129 %arrayidx38 = getelementptr [16 x i32]* %output2, i64 0, i64 %indvar
130 %arrayidx33 = getelementptr [16 x i32]* %check_results, i64 0, i64 %indvar
131 %arrayidx29 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar
132 %i.1 = trunc i64 %indvar to i32
133 %cmp25 = icmp slt i32 %i.1, 16
134 br i1 %cmp25, label %for.body26, label %for.end48
136 for.body26: ; preds = %for.cond23
137 %tmp30 = load i32* %arrayidx29, align 4
138 %tmp34 = load i32* %arrayidx33, align 4
139 %cmp35 = icmp eq i32 %tmp30, %tmp34
140 br i1 %cmp35, label %lor.lhs.false, label %if.then
142 lor.lhs.false: ; preds = %for.body26
143 %tmp39 = load i32* %arrayidx38, align 4
144 %tmp43 = load i32* %arrayidx42, align 4
145 %cmp44 = icmp eq i32 %tmp39, %tmp43
146 br i1 %cmp44, label %if.end, label %if.then
148 if.then: ; preds = %lor.lhs.false, %for.body26
149 call void @abort() noreturn nounwind
152 if.end: ; preds = %lor.lhs.false
155 for.inc45: ; preds = %if.end
156 %indvar.next = add i64 %indvar, 1
159 for.end48: ; preds = %for.cond23
163 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
165 define internal void @check_vect() nounwind uwtable noinline {
167 %a = alloca i32, align 4
168 %b = alloca i32, align 4
169 %c = alloca i32, align 4
170 %d = alloca i32, align 4
171 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
172 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
173 %tobool = icmp eq i32 %call1, 0
174 br i1 %tobool, label %if.then, label %lor.lhs.false
176 lor.lhs.false: ; preds = %entry
177 %tmp4 = load i32* %d, align 4
178 %and6 = and i32 %tmp4, 67108864
179 %cmp = icmp eq i32 %and6, 0
180 br i1 %cmp, label %if.then, label %if.end
182 if.then: ; preds = %entry, %lor.lhs.false
183 call void @exit(i32 0) noreturn nounwind
186 if.end: ; preds = %lor.lhs.false
187 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
191 declare void @abort() noreturn
193 declare void (i32)* @signal(i32, void (i32)*) nounwind
195 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
197 call void @exit(i32 0) noreturn nounwind
200 return: ; No predecessors!
204 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
206 %and = and i32 %__level, -2147483648
207 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
208 %cmp = icmp ult i32 %call, %__level
209 br i1 %cmp, label %if.then, label %if.end
211 if.then: ; preds = %entry
214 if.end: ; preds = %entry
215 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !1
216 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
217 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
218 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
219 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
220 store i32 %asmresult, i32* %__eax, align 4
221 store i32 %asmresult8, i32* %__ebx, align 4
222 store i32 %asmresult9, i32* %__ecx, align 4
223 store i32 %asmresult10, i32* %__edx, align 4
226 return: ; preds = %if.end, %if.then
227 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
231 declare void @exit(i32) noreturn
233 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
235 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !2
236 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
237 %tobool = icmp eq i32* %__sig, null
238 br i1 %tobool, label %if.end, label %if.then
240 if.then: ; preds = %entry
241 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
242 store i32 %asmresult1, i32* %__sig, align 4
245 if.end: ; preds = %entry, %if.then
249 !0 = metadata !{i32 1386}
250 !1 = metadata !{i32 -2147342501, i32 -2147342493}
251 !2 = metadata !{i32 -2147342624, i32 -2147342616}