1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 slp-perm-6.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer slp-perm-6.s > slp-perm-6.ll
6 ; ModuleID = 'slp-perm-6.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @main.check_results = internal unnamed_addr constant [16 x i32] [i32 1470, i32 395, i32 28271, i32 5958, i32 1655, i32 111653, i32 10446, i32 2915, i32 195035, i32 14934, i32 4175, i32 278417, i32 19422, i32 5435, i32 361799, i32 0], align 16
11 @main.check_results2 = internal unnamed_addr constant [16 x i32] [i32 0, i32 112, i32 810, i32 336, i32 1620, i32 560, i32 2430, i32 784, i32 3240, i32 1008, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
13 define void @foo(i32* noalias %pInput, i32* noalias %pOutput, i32* noalias %pInput2, i32* noalias %pOutput2) nounwind uwtable {
17 for.cond: ; preds = %for.inc, %entry
18 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
19 %tmp = mul i64 %indvar, 2
20 %tmp1 = add i64 %tmp, 1
21 %incdec.ptr46 = getelementptr i32* %pOutput2, i64 %tmp1
22 %pOutput2.addr.0 = getelementptr i32* %pOutput2, i64 %tmp
23 %incdec.ptr10 = getelementptr i32* %pInput2, i64 %tmp1
24 %pInput2.addr.0 = getelementptr i32* %pInput2, i64 %tmp
25 %tmp3 = mul i64 %indvar, 3
26 %tmp4 = add i64 %tmp3, 2
27 %incdec.ptr32 = getelementptr i32* %pOutput, i64 %tmp4
28 %tmp6 = add i64 %tmp3, 1
29 %incdec.ptr22 = getelementptr i32* %pOutput, i64 %tmp6
30 %pOutput.addr.0 = getelementptr i32* %pOutput, i64 %tmp3
31 %incdec.ptr4 = getelementptr i32* %pInput, i64 %tmp4
32 %incdec.ptr = getelementptr i32* %pInput, i64 %tmp6
33 %pInput.addr.0 = getelementptr i32* %pInput, i64 %tmp3
34 %exitcond = icmp ne i64 %indvar, 5
35 br i1 %exitcond, label %for.body, label %for.end
37 for.body: ; preds = %for.cond
38 %tmp2 = load i32* %pInput.addr.0, align 4
39 %tmp5 = load i32* %incdec.ptr, align 4
40 %tmp8 = load i32* %incdec.ptr4, align 4
41 %tmp11 = load i32* %pInput2.addr.0, align 4
42 %tmp14 = load i32* %incdec.ptr10, align 4
43 %mul = mul nsw i32 %tmp2, 100
44 %mul17 = mul nsw i32 %tmp5, 1322
45 %add = add nsw i32 %mul, %mul17
46 %mul19 = mul nsw i32 %tmp8, 74
47 %add20 = add nsw i32 %add, %mul19
48 store i32 %add20, i32* %pOutput.addr.0, align 4
49 %mul24 = mul nsw i32 %tmp2, 216
50 %mul26 = mul nsw i32 %tmp5, 13
51 %add27 = add nsw i32 %mul24, %mul26
52 %mul29 = mul nsw i32 %tmp8, 191
53 %add30 = add nsw i32 %add27, %mul29
54 store i32 %add30, i32* %incdec.ptr22, align 4
55 %mul34 = mul nsw i32 %tmp2, 23
56 %mul36 = mul nsw i32 %tmp5, 27271
57 %add37 = add nsw i32 %mul34, %mul36
58 %mul39 = mul nsw i32 %tmp8, 500
59 %add40 = add nsw i32 %add37, %mul39
60 store i32 %add40, i32* %incdec.ptr32, align 4
61 %mul44 = mul nsw i32 %tmp11, 405
62 store i32 %mul44, i32* %pOutput2.addr.0, align 4
63 %mul48 = mul nsw i32 %tmp14, 112
64 store i32 %mul48, i32* %incdec.ptr46, align 4
67 for.inc: ; preds = %for.body
68 %indvar.next = add i64 %indvar, 1
71 for.end: ; preds = %for.cond
75 define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
77 %input = alloca [16 x i32], align 16
78 %output = alloca [16 x i32], align 16
79 %check_results = alloca [16 x i32], align 16
80 %input2 = alloca [16 x i32], align 16
81 %output2 = alloca [16 x i32], align 16
82 %check_results2 = alloca [16 x i32], align 16
83 %tmp = bitcast [16 x i32]* %check_results to i8*
84 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([16 x i32]* @main.check_results to i8*), i64 64, i32 16, i1 false)
85 %tmp4 = bitcast [16 x i32]* %check_results2 to i8*
86 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp4, i8* bitcast ([16 x i32]* @main.check_results2 to i8*), i64 64, i32 16, i1 false)
87 call void @check_vect()
90 for.cond: ; preds = %for.inc, %entry
91 %indvar2 = phi i64 [ %indvar.next3, %for.inc ], [ 0, %entry ]
92 %tmp6 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
93 %arrayidx18 = getelementptr [16 x i32]* %output2, i64 0, i64 %indvar2
94 %arrayidx15 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar2
95 %arrayidx12 = getelementptr [16 x i32]* %input2, i64 0, i64 %indvar2
96 %arrayidx = getelementptr [16 x i32]* %input, i64 0, i64 %indvar2
97 %exitcond = icmp ne i64 %indvar2, 16
98 br i1 %exitcond, label %for.body, label %for.end
100 for.body: ; preds = %for.cond
101 store i32 %tmp6, i32* %arrayidx, align 4
102 store i32 %tmp6, i32* %arrayidx12, align 4
103 store i32 0, i32* %arrayidx15, align 4
104 store i32 0, i32* %arrayidx18, align 4
105 call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
108 for.inc: ; preds = %for.body
109 %inc = add nsw i32 %tmp6, 1
110 %indvar.next3 = add i64 %indvar2, 1
113 for.end: ; preds = %for.cond
114 %arraydecay = getelementptr inbounds [16 x i32]* %input, i64 0, i64 0
115 %arraydecay20 = getelementptr inbounds [16 x i32]* %output, i64 0, i64 0
116 %arraydecay21 = getelementptr inbounds [16 x i32]* %input2, i64 0, i64 0
117 %arraydecay22 = getelementptr inbounds [16 x i32]* %output2, i64 0, i64 0
118 call void @foo(i32* %arraydecay, i32* %arraydecay20, i32* %arraydecay21, i32* %arraydecay22)
121 for.cond23: ; preds = %for.inc45, %for.end
122 %indvar = phi i64 [ %indvar.next, %for.inc45 ], [ 0, %for.end ]
123 %arrayidx42 = getelementptr [16 x i32]* %check_results2, i64 0, i64 %indvar
124 %arrayidx38 = getelementptr [16 x i32]* %output2, i64 0, i64 %indvar
125 %arrayidx33 = getelementptr [16 x i32]* %check_results, i64 0, i64 %indvar
126 %arrayidx29 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar
127 %i.1 = trunc i64 %indvar to i32
128 %cmp25 = icmp slt i32 %i.1, 16
129 br i1 %cmp25, label %for.body26, label %for.end48
131 for.body26: ; preds = %for.cond23
132 %tmp30 = load i32* %arrayidx29, align 4
133 %tmp34 = load i32* %arrayidx33, align 4
134 %cmp35 = icmp eq i32 %tmp30, %tmp34
135 br i1 %cmp35, label %lor.lhs.false, label %if.then
137 lor.lhs.false: ; preds = %for.body26
138 %tmp39 = load i32* %arrayidx38, align 4
139 %tmp43 = load i32* %arrayidx42, align 4
140 %cmp44 = icmp eq i32 %tmp39, %tmp43
141 br i1 %cmp44, label %if.end, label %if.then
143 if.then: ; preds = %lor.lhs.false, %for.body26
144 call void @abort() noreturn nounwind
147 if.end: ; preds = %lor.lhs.false
150 for.inc45: ; preds = %if.end
151 %indvar.next = add i64 %indvar, 1
154 for.end48: ; preds = %for.cond23
158 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
160 define internal void @check_vect() nounwind uwtable noinline {
162 %a = alloca i32, align 4
163 %b = alloca i32, align 4
164 %c = alloca i32, align 4
165 %d = alloca i32, align 4
166 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
167 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
168 %tobool = icmp eq i32 %call1, 0
169 br i1 %tobool, label %if.then, label %lor.lhs.false
171 lor.lhs.false: ; preds = %entry
172 %tmp4 = load i32* %d, align 4
173 %and6 = and i32 %tmp4, 67108864
174 %cmp = icmp eq i32 %and6, 0
175 br i1 %cmp, label %if.then, label %if.end
177 if.then: ; preds = %entry, %lor.lhs.false
178 call void @exit(i32 0) noreturn nounwind
181 if.end: ; preds = %lor.lhs.false
182 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
186 declare void @abort() noreturn
188 declare void (i32)* @signal(i32, void (i32)*) nounwind
190 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
192 call void @exit(i32 0) noreturn nounwind
195 return: ; No predecessors!
199 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
201 %and = and i32 %__level, -2147483648
202 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
203 %cmp = icmp ult i32 %call, %__level
204 br i1 %cmp, label %if.then, label %if.end
206 if.then: ; preds = %entry
209 if.end: ; preds = %entry
210 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !1
211 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
212 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
213 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
214 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
215 store i32 %asmresult, i32* %__eax, align 4
216 store i32 %asmresult8, i32* %__ebx, align 4
217 store i32 %asmresult9, i32* %__ecx, align 4
218 store i32 %asmresult10, i32* %__edx, align 4
221 return: ; preds = %if.end, %if.then
222 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
226 declare void @exit(i32) noreturn
228 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
230 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !2
231 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
232 %tobool = icmp eq i32* %__sig, null
233 br i1 %tobool, label %if.end, label %if.then
235 if.then: ; preds = %entry
236 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
237 store i32 %asmresult1, i32* %__sig, align 4
240 if.end: ; preds = %entry, %if.then
244 !0 = metadata !{i32 1391}
245 !1 = metadata !{i32 -2147342497, i32 -2147342489}
246 !2 = metadata !{i32 -2147342620, i32 -2147342612}