1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-strided-a-u16-i4.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-strided-a-u16-i4.s > vect-strided-a-u16-i4.ll
6 ; ModuleID = 'vect-strided-a-u16-i4.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 %struct.s = type { i16, i16, i16, i16 }
12 define i32 @main1() nounwind uwtable noinline {
14 %arr = alloca [128 x %struct.s], align 16
15 %res = alloca [128 x %struct.s], align 16
18 for.cond: ; preds = %for.inc, %entry
19 %indvar20 = phi i64 [ %indvar.next21, %for.inc ], [ 0, %entry ]
20 %tmp = mul i64 %indvar20, 2
21 %conv4 = trunc i64 %tmp to i16
22 %scevgep23 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar20
23 %a = bitcast %struct.s* %scevgep23 to i16*
24 %d = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar20, i32 3
25 %tmp26 = add i64 %indvar20, 34
26 %conv12 = trunc i64 %tmp26 to i16
27 %c = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar20, i32 2
28 %b = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar20, i32 1
29 %conv = trunc i64 %indvar20 to i16
30 %i.0 = trunc i64 %indvar20 to i32
31 %cmp = icmp slt i32 %i.0, 128
32 br i1 %cmp, label %for.body, label %for.end
34 for.body: ; preds = %for.cond
35 store i16 %conv, i16* %a, align 8
36 store i16 %conv4, i16* %b, align 2
37 store i16 17, i16* %c, align 4
38 store i16 %conv12, i16* %d, align 2
39 %tmp20 = load i16* %a, align 8
40 %cmp22 = icmp eq i16 %tmp20, 178
41 br i1 %cmp22, label %if.then, label %if.end
43 if.then: ; preds = %for.body
44 call void @abort() noreturn nounwind
47 if.end: ; preds = %for.body
50 for.inc: ; preds = %if.end
51 %indvar.next21 = add i64 %indvar20, 1
54 for.end: ; preds = %for.cond
57 for.cond25: ; preds = %for.inc110, %for.end
58 %indvar8 = phi i64 [ %indvar.next9, %for.inc110 ], [ 0, %for.end ]
59 %b108 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar8, i32 1
60 %d98 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar8, i32 3
61 %scevgep12 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar8
62 %a88 = bitcast %struct.s* %scevgep12 to i16*
63 %c58 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar8, i32 2
64 %d40 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar8, i32 3
65 %b31 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar8, i32 1
66 %c44 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar8, i32 2
67 %scevgep18 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar8
68 %a35 = bitcast %struct.s* %scevgep18 to i16*
69 %exitcond = icmp ne i64 %indvar8, 128
70 br i1 %exitcond, label %for.body29, label %for.end113
72 for.body29: ; preds = %for.cond25
73 %tmp32 = load i16* %b31, align 2
74 %tmp36 = load i16* %a35, align 2
75 %sub = sub i16 %tmp32, %tmp36
76 %tmp41 = load i16* %d40, align 2
77 %tmp45 = load i16* %c44, align 2
78 %sub47 = sub i16 %tmp41, %tmp45
79 %add53 = add i16 %sub, %sub47
80 store i16 %add53, i16* %c58, align 4
81 %tmp61 = load i16* %a35, align 2
82 %tmp65 = load i16* %c44, align 2
83 %add67 = add i16 %tmp61, %tmp65
84 %tmp71 = load i16* %b31, align 2
85 %tmp75 = load i16* %d40, align 2
86 %add77 = add i16 %tmp71, %tmp75
87 %add83 = add i16 %add67, %add77
88 store i16 %add83, i16* %a88, align 8
89 %add93 = add i16 %sub, %sub47
90 store i16 %add93, i16* %d98, align 2
91 %add103 = add i16 %sub, %sub47
92 store i16 %add103, i16* %b108, align 2
95 for.inc110: ; preds = %for.body29
96 %indvar.next9 = add i64 %indvar8, 1
99 for.end113: ; preds = %for.cond25
100 br label %for.cond114
102 for.cond114: ; preds = %for.inc263, %for.end113
103 %indvar = phi i64 [ %indvar.next, %for.inc263 ], [ 0, %for.end113 ]
104 %c148 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 2
105 %d141 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 3
106 %scevgep = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar
107 %a134 = bitcast %struct.s* %scevgep to i16*
108 %b128 = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 1
109 %b229 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 1
110 %d193 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 3
111 %scevgep5 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar
112 %a157 = bitcast %struct.s* %scevgep5 to i16*
113 %c122 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 2
114 %i.2 = trunc i64 %indvar to i32
115 %cmp116 = icmp slt i32 %i.2, 128
116 br i1 %cmp116, label %for.body118, label %for.end266
118 for.body118: ; preds = %for.cond114
119 %tmp123 = load i16* %c122, align 4
120 %conv124 = zext i16 %tmp123 to i32
121 %tmp129 = load i16* %b128, align 2
122 %conv130 = zext i16 %tmp129 to i32
123 %tmp135 = load i16* %a134, align 8
124 %conv136 = zext i16 %tmp135 to i32
125 %sub137 = sub nsw i32 %conv130, %conv136
126 %tmp142 = load i16* %d141, align 2
127 %conv143 = zext i16 %tmp142 to i32
128 %add144 = add nsw i32 %sub137, %conv143
129 %tmp149 = load i16* %c148, align 4
130 %conv150 = zext i16 %tmp149 to i32
131 %sub151 = sub nsw i32 %add144, %conv150
132 %cmp152 = icmp eq i32 %conv124, %sub151
133 br i1 %cmp152, label %lor.lhs.false, label %if.then261
135 lor.lhs.false: ; preds = %for.body118
136 %tmp158 = load i16* %a157, align 8
137 %conv159 = zext i16 %tmp158 to i32
138 %tmp164 = load i16* %a134, align 8
139 %conv165 = zext i16 %tmp164 to i32
140 %tmp170 = load i16* %c148, align 4
141 %conv171 = zext i16 %tmp170 to i32
142 %add172 = add nsw i32 %conv165, %conv171
143 %tmp177 = load i16* %b128, align 2
144 %conv178 = zext i16 %tmp177 to i32
145 %add179 = add nsw i32 %add172, %conv178
146 %tmp184 = load i16* %d141, align 2
147 %conv185 = zext i16 %tmp184 to i32
148 %add186 = add nsw i32 %add179, %conv185
149 %cmp187 = icmp eq i32 %conv159, %add186
150 br i1 %cmp187, label %lor.lhs.false189, label %if.then261
152 lor.lhs.false189: ; preds = %lor.lhs.false
153 %tmp194 = load i16* %d193, align 2
154 %conv195 = zext i16 %tmp194 to i32
155 %tmp200 = load i16* %b128, align 2
156 %conv201 = zext i16 %tmp200 to i32
157 %tmp206 = load i16* %a134, align 8
158 %conv207 = zext i16 %tmp206 to i32
159 %sub208 = sub nsw i32 %conv201, %conv207
160 %tmp213 = load i16* %d141, align 2
161 %conv214 = zext i16 %tmp213 to i32
162 %add215 = add nsw i32 %sub208, %conv214
163 %tmp220 = load i16* %c148, align 4
164 %conv221 = zext i16 %tmp220 to i32
165 %sub222 = sub nsw i32 %add215, %conv221
166 %cmp223 = icmp eq i32 %conv195, %sub222
167 br i1 %cmp223, label %lor.lhs.false225, label %if.then261
169 lor.lhs.false225: ; preds = %lor.lhs.false189
170 %tmp230 = load i16* %b229, align 2
171 %conv231 = zext i16 %tmp230 to i32
172 %tmp236 = load i16* %b128, align 2
173 %conv237 = zext i16 %tmp236 to i32
174 %tmp242 = load i16* %a134, align 8
175 %conv243 = zext i16 %tmp242 to i32
176 %sub244 = sub nsw i32 %conv237, %conv243
177 %tmp249 = load i16* %d141, align 2
178 %conv250 = zext i16 %tmp249 to i32
179 %add251 = add nsw i32 %sub244, %conv250
180 %tmp256 = load i16* %c148, align 4
181 %conv257 = zext i16 %tmp256 to i32
182 %sub258 = sub nsw i32 %add251, %conv257
183 %cmp259 = icmp eq i32 %conv231, %sub258
184 br i1 %cmp259, label %if.end262, label %if.then261
186 if.then261: ; preds = %lor.lhs.false225, %lor.lhs.false189, %lor.lhs.false, %for.body118
187 call void @abort() noreturn nounwind
190 if.end262: ; preds = %lor.lhs.false225
193 for.inc263: ; preds = %if.end262
194 %indvar.next = add i64 %indvar, 1
195 br label %for.cond114
197 for.end266: ; preds = %for.cond114
201 declare void @abort() noreturn
203 define i32 @main() nounwind uwtable {
205 call void @check_vect()
206 %call = call i32 @main1()
210 define internal void @check_vect() nounwind uwtable noinline {
212 %a = alloca i32, align 4
213 %b = alloca i32, align 4
214 %c = alloca i32, align 4
215 %d = alloca i32, align 4
216 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
217 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
218 %tobool = icmp eq i32 %call1, 0
219 br i1 %tobool, label %if.then, label %lor.lhs.false
221 lor.lhs.false: ; preds = %entry
222 %tmp4 = load i32* %d, align 4
223 %and6 = and i32 %tmp4, 67108864
224 %cmp = icmp eq i32 %and6, 0
225 br i1 %cmp, label %if.then, label %if.end
227 if.then: ; preds = %entry, %lor.lhs.false
228 call void @exit(i32 0) noreturn nounwind
231 if.end: ; preds = %lor.lhs.false
232 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
236 declare void (i32)* @signal(i32, void (i32)*) nounwind
238 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
240 call void @exit(i32 0) noreturn nounwind
243 return: ; No predecessors!
247 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
249 %and = and i32 %__level, -2147483648
250 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
251 %cmp = icmp ult i32 %call, %__level
252 br i1 %cmp, label %if.then, label %if.end
254 if.then: ; preds = %entry
257 if.end: ; preds = %entry
258 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !0
259 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
260 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
261 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
262 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
263 store i32 %asmresult, i32* %__eax, align 4
264 store i32 %asmresult8, i32* %__ebx, align 4
265 store i32 %asmresult9, i32* %__ecx, align 4
266 store i32 %asmresult10, i32* %__edx, align 4
269 return: ; preds = %if.end, %if.then
270 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
274 declare void @exit(i32) noreturn
276 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
278 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !1
279 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
280 %tobool = icmp eq i32* %__sig, null
281 br i1 %tobool, label %if.end, label %if.then
283 if.then: ; preds = %entry
284 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
285 store i32 %asmresult1, i32* %__sig, align 4
288 if.end: ; preds = %entry, %if.then
292 !0 = metadata !{i32 -2147342975, i32 -2147342967}
293 !1 = metadata !{i32 -2147343098, i32 -2147343090}