1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-strided-u8-i8-gap4.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-strided-u8-i8-gap4.s > vect-strided-u8-i8-gap4.ll
6 ; ModuleID = 'vect-strided-u8-i8-gap4.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8 }
12 define i32 @main1(%struct.s* %arr) nounwind uwtable noinline {
14 %res = alloca [16 x %struct.s], align 16
17 for.cond: ; preds = %for.inc, %entry
18 %indvar24 = phi i64 [ %indvar.next25, %for.inc ], [ 0, %entry ]
19 %g = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 6
20 %h = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 7
21 %e78 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 4
22 %f64 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 5
23 %b51 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 1
24 %d = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 3
25 %scevgep33 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24
26 %a = bitcast %struct.s* %scevgep33 to i8*
27 %c11 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar24, i32 2
28 %c = getelementptr %struct.s* %arr, i64 %indvar24, i32 2
29 %b = getelementptr %struct.s* %arr, i64 %indvar24, i32 1
30 %e = getelementptr %struct.s* %arr, i64 %indvar24, i32 4
31 %f = getelementptr %struct.s* %arr, i64 %indvar24, i32 5
32 %exitcond26 = icmp ne i64 %indvar24, 16
33 br i1 %exitcond26, label %for.body, label %for.end
35 for.body: ; preds = %for.cond
36 %tmp5 = load i8* %b, align 1
37 %tmp7 = load i8* %c, align 1
38 %add = add i8 %tmp5, %tmp7
39 store i8 %add, i8* %c11, align 2
40 %tmp14 = load i8* %c, align 1
41 %tmp17 = load i8* %f, align 1
42 %add19 = add i8 %tmp14, %tmp17
43 %tmp25 = load i8* %b, align 1
44 %add27 = add i8 %add19, %tmp25
45 store i8 %add27, i8* %a, align 8
46 %tmp34 = load i8* %b, align 1
47 %tmp38 = load i8* %c, align 1
48 %add40 = add i8 %tmp34, %tmp38
49 store i8 %add40, i8* %d, align 1
50 %tmp47 = load i8* %c, align 1
51 store i8 %tmp47, i8* %b51, align 1
52 %tmp54 = load i8* %f, align 1
53 %tmp57 = load i8* %e, align 1
54 %add59 = add i8 %tmp54, %tmp57
55 store i8 %add59, i8* %f64, align 1
56 %tmp67 = load i8* %b, align 1
57 %tmp71 = load i8* %e, align 1
58 %add73 = add i8 %tmp67, %tmp71
59 store i8 %add73, i8* %e78, align 4
60 %tmp81 = load i8* %c, align 1
61 store i8 %tmp81, i8* %h, align 1
62 %tmp87 = load i8* %b, align 1
63 %tmp91 = load i8* %c, align 1
64 %add93 = add i8 %tmp87, %tmp91
65 store i8 %add93, i8* %g, align 2
68 for.inc: ; preds = %for.body
69 %indvar.next25 = add i64 %indvar24, 1
72 for.end: ; preds = %for.cond
75 for.cond100: ; preds = %for.inc287, %for.end
76 %indvar9 = phi i64 [ %indvar.next10, %for.inc287 ], [ 0, %for.end ]
77 %c122 = getelementptr %struct.s* %arr, i64 %indvar9, i32 2
78 %b115 = getelementptr %struct.s* %arr, i64 %indvar9, i32 1
79 %g267 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 6
80 %h251 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 7
81 %e217 = getelementptr %struct.s* %arr, i64 %indvar9, i32 4
82 %e227 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 4
83 %f145 = getelementptr %struct.s* %arr, i64 %indvar9, i32 5
84 %f203 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 5
85 %b187 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 1
86 %d163 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 3
87 %scevgep21 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9
88 %a131 = bitcast %struct.s* %scevgep21 to i8*
89 %c108 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar9, i32 2
90 %i.1 = trunc i64 %indvar9 to i32
91 %cmp102 = icmp slt i32 %i.1, 16
92 br i1 %cmp102, label %for.body104, label %for.end290
94 for.body104: ; preds = %for.cond100
95 %tmp109 = load i8* %c108, align 2
96 %conv110 = zext i8 %tmp109 to i32
97 %tmp116 = load i8* %b115, align 1
98 %conv117 = zext i8 %tmp116 to i32
99 %tmp123 = load i8* %c122, align 1
100 %conv124 = zext i8 %tmp123 to i32
101 %add125 = add nsw i32 %conv117, %conv124
102 %cmp126 = icmp eq i32 %conv110, %add125
103 br i1 %cmp126, label %lor.lhs.false, label %if.then
105 lor.lhs.false: ; preds = %for.body104
106 %tmp132 = load i8* %a131, align 8
107 %conv133 = zext i8 %tmp132 to i32
108 %tmp139 = load i8* %c122, align 1
109 %conv140 = zext i8 %tmp139 to i32
110 %tmp146 = load i8* %f145, align 1
111 %conv147 = zext i8 %tmp146 to i32
112 %add148 = add nsw i32 %conv140, %conv147
113 %tmp154 = load i8* %b115, align 1
114 %conv155 = zext i8 %tmp154 to i32
115 %add156 = add nsw i32 %add148, %conv155
116 %cmp157 = icmp eq i32 %conv133, %add156
117 br i1 %cmp157, label %lor.lhs.false159, label %if.then
119 lor.lhs.false159: ; preds = %lor.lhs.false
120 %tmp164 = load i8* %d163, align 1
121 %conv165 = zext i8 %tmp164 to i32
122 %tmp171 = load i8* %b115, align 1
123 %conv172 = zext i8 %tmp171 to i32
124 %tmp178 = load i8* %c122, align 1
125 %conv179 = zext i8 %tmp178 to i32
126 %add180 = add nsw i32 %conv172, %conv179
127 %cmp181 = icmp eq i32 %conv165, %add180
128 br i1 %cmp181, label %lor.lhs.false183, label %if.then
130 lor.lhs.false183: ; preds = %lor.lhs.false159
131 %tmp188 = load i8* %b187, align 1
132 %tmp195 = load i8* %c122, align 1
133 %cmp197 = icmp eq i8 %tmp188, %tmp195
134 br i1 %cmp197, label %lor.lhs.false199, label %if.then
136 lor.lhs.false199: ; preds = %lor.lhs.false183
137 %tmp204 = load i8* %f203, align 1
138 %conv205 = zext i8 %tmp204 to i32
139 %tmp211 = load i8* %f145, align 1
140 %conv212 = zext i8 %tmp211 to i32
141 %tmp218 = load i8* %e217, align 1
142 %conv219 = zext i8 %tmp218 to i32
143 %add220 = add nsw i32 %conv212, %conv219
144 %cmp221 = icmp eq i32 %conv205, %add220
145 br i1 %cmp221, label %lor.lhs.false223, label %if.then
147 lor.lhs.false223: ; preds = %lor.lhs.false199
148 %tmp228 = load i8* %e227, align 4
149 %conv229 = zext i8 %tmp228 to i32
150 %tmp235 = load i8* %b115, align 1
151 %conv236 = zext i8 %tmp235 to i32
152 %tmp242 = load i8* %e217, align 1
153 %conv243 = zext i8 %tmp242 to i32
154 %add244 = add nsw i32 %conv236, %conv243
155 %cmp245 = icmp eq i32 %conv229, %add244
156 br i1 %cmp245, label %lor.lhs.false247, label %if.then
158 lor.lhs.false247: ; preds = %lor.lhs.false223
159 %tmp252 = load i8* %h251, align 1
160 %tmp259 = load i8* %c122, align 1
161 %cmp261 = icmp eq i8 %tmp252, %tmp259
162 br i1 %cmp261, label %lor.lhs.false263, label %if.then
164 lor.lhs.false263: ; preds = %lor.lhs.false247
165 %tmp268 = load i8* %g267, align 2
166 %conv269 = zext i8 %tmp268 to i32
167 %tmp275 = load i8* %b115, align 1
168 %conv276 = zext i8 %tmp275 to i32
169 %tmp282 = load i8* %c122, align 1
170 %conv283 = zext i8 %tmp282 to i32
171 %add284 = add nsw i32 %conv276, %conv283
172 %cmp285 = icmp eq i32 %conv269, %add284
173 br i1 %cmp285, label %if.end, label %if.then
175 if.then: ; preds = %lor.lhs.false263, %lor.lhs.false247, %lor.lhs.false223, %lor.lhs.false199, %lor.lhs.false183, %lor.lhs.false159, %lor.lhs.false, %for.body104
176 call void @abort() noreturn nounwind
179 if.end: ; preds = %lor.lhs.false263
182 for.inc287: ; preds = %if.end
183 %indvar.next10 = add i64 %indvar9, 1
184 br label %for.cond100
186 for.end290: ; preds = %for.cond100
187 br label %for.cond292
189 for.cond292: ; preds = %for.inc313, %for.end290
190 %indvar2 = phi i64 [ %indvar.next3, %for.inc313 ], [ 0, %for.end290 ]
191 %b310 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar2, i32 1
192 %scevgep5 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar2
193 %a303 = bitcast %struct.s* %scevgep5 to i8*
194 %c305 = getelementptr %struct.s* %arr, i64 %indvar2, i32 2
195 %b298 = getelementptr %struct.s* %arr, i64 %indvar2, i32 1
196 %exitcond = icmp ne i64 %indvar2, 16
197 br i1 %exitcond, label %for.body296, label %for.end316
199 for.body296: ; preds = %for.cond292
200 %tmp299 = load i8* %b298, align 1
201 store i8 %tmp299, i8* %a303, align 8
202 %tmp306 = load i8* %c305, align 1
203 store i8 %tmp306, i8* %b310, align 1
206 for.inc313: ; preds = %for.body296
207 %indvar.next3 = add i64 %indvar2, 1
208 br label %for.cond292
210 for.end316: ; preds = %for.cond292
211 br label %for.cond317
213 for.cond317: ; preds = %for.inc355, %for.end316
214 %indvar = phi i64 [ %indvar.next, %for.inc355 ], [ 0, %for.end316 ]
215 %c348 = getelementptr %struct.s* %arr, i64 %indvar, i32 2
216 %b341 = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar, i32 1
217 %b332 = getelementptr %struct.s* %arr, i64 %indvar, i32 1
218 %scevgep = getelementptr [16 x %struct.s]* %res, i64 0, i64 %indvar
219 %a325 = bitcast %struct.s* %scevgep to i8*
220 %i.3 = trunc i64 %indvar to i32
221 %cmp319 = icmp slt i32 %i.3, 16
222 br i1 %cmp319, label %for.body321, label %for.end358
224 for.body321: ; preds = %for.cond317
225 %tmp326 = load i8* %a325, align 8
226 %tmp333 = load i8* %b332, align 1
227 %cmp335 = icmp eq i8 %tmp326, %tmp333
228 br i1 %cmp335, label %lor.lhs.false337, label %if.then353
230 lor.lhs.false337: ; preds = %for.body321
231 %tmp342 = load i8* %b341, align 1
232 %tmp349 = load i8* %c348, align 1
233 %cmp351 = icmp eq i8 %tmp342, %tmp349
234 br i1 %cmp351, label %if.end354, label %if.then353
236 if.then353: ; preds = %lor.lhs.false337, %for.body321
237 call void @abort() noreturn nounwind
240 if.end354: ; preds = %lor.lhs.false337
243 for.inc355: ; preds = %if.end354
244 %indvar.next = add i64 %indvar, 1
245 br label %for.cond317
247 for.end358: ; preds = %for.cond317
251 declare void @abort() noreturn
253 define i32 @main() nounwind uwtable {
255 %arr = alloca [16 x %struct.s], align 16
256 call void @check_vect()
259 for.cond: ; preds = %for.inc, %entry
260 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
261 %tmp = add i64 %indvar, -3
262 %conv30 = trunc i64 %tmp to i8
263 %tmp2 = mul i64 %indvar, 2
264 %conv4 = trunc i64 %tmp2 to i8
265 %scevgep = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar
266 %a = bitcast %struct.s* %scevgep to i8*
267 %h = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 7
268 %g = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 6
269 %f = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 5
270 %tmp8 = mul i64 %indvar, 5
271 %conv25 = trunc i64 %tmp8 to i8
272 %e = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 4
273 %tmp11 = mul i64 %indvar, 3
274 %tmp12 = add i64 %tmp11, 5
275 %conv19 = trunc i64 %tmp12 to i8
276 %d = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 3
277 %tmp15 = add i64 %indvar, 34
278 %conv12 = trunc i64 %tmp15 to i8
279 %c = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 2
280 %b = getelementptr [16 x %struct.s]* %arr, i64 0, i64 %indvar, i32 1
281 %conv = trunc i64 %indvar to i8
282 %i.0 = trunc i64 %indvar to i32
283 %cmp = icmp slt i32 %i.0, 16
284 br i1 %cmp, label %for.body, label %for.end
286 for.body: ; preds = %for.cond
287 store i8 %conv, i8* %a, align 8
288 store i8 %conv4, i8* %b, align 1
289 store i8 17, i8* %c, align 2
290 store i8 %conv12, i8* %d, align 1
291 store i8 %conv19, i8* %e, align 4
292 store i8 %conv25, i8* %f, align 1
293 store i8 %conv30, i8* %g, align 2
294 store i8 56, i8* %h, align 1
295 %tmp41 = load i8* %a, align 8
296 %cmp43 = icmp eq i8 %tmp41, -78
297 br i1 %cmp43, label %if.then, label %if.end
299 if.then: ; preds = %for.body
300 call void @abort() noreturn nounwind
303 if.end: ; preds = %for.body
306 for.inc: ; preds = %if.end
307 %indvar.next = add i64 %indvar, 1
310 for.end: ; preds = %for.cond
311 %arraydecay = getelementptr inbounds [16 x %struct.s]* %arr, i64 0, i64 0
312 %call = call i32 @main1(%struct.s* %arraydecay)
316 define internal void @check_vect() nounwind uwtable noinline {
318 %a = alloca i32, align 4
319 %b = alloca i32, align 4
320 %c = alloca i32, align 4
321 %d = alloca i32, align 4
322 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
323 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
324 %tobool = icmp eq i32 %call1, 0
325 br i1 %tobool, label %if.then, label %lor.lhs.false
327 lor.lhs.false: ; preds = %entry
328 %tmp4 = load i32* %d, align 4
329 %and6 = and i32 %tmp4, 67108864
330 %cmp = icmp eq i32 %and6, 0
331 br i1 %cmp, label %if.then, label %if.end
333 if.then: ; preds = %entry, %lor.lhs.false
334 call void @exit(i32 0) noreturn nounwind
337 if.end: ; preds = %lor.lhs.false
338 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
342 declare void (i32)* @signal(i32, void (i32)*) nounwind
344 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
346 call void @exit(i32 0) noreturn nounwind
349 return: ; No predecessors!
353 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
355 %and = and i32 %__level, -2147483648
356 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
357 %cmp = icmp ult i32 %call, %__level
358 br i1 %cmp, label %if.then, label %if.end
360 if.then: ; preds = %entry
363 if.end: ; preds = %entry
364 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !0
365 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
366 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
367 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
368 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
369 store i32 %asmresult, i32* %__eax, align 4
370 store i32 %asmresult8, i32* %__ebx, align 4
371 store i32 %asmresult9, i32* %__ecx, align 4
372 store i32 %asmresult10, i32* %__edx, align 4
375 return: ; preds = %if.end, %if.then
376 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
380 declare void @exit(i32) noreturn
382 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
384 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !1
385 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
386 %tobool = icmp eq i32* %__sig, null
387 br i1 %tobool, label %if.end, label %if.then
389 if.then: ; preds = %entry
390 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
391 store i32 %asmresult1, i32* %__sig, align 4
394 if.end: ; preds = %entry, %if.then
398 !0 = metadata !{i32 -2147342341, i32 -2147342333}
399 !1 = metadata !{i32 -2147342464, i32 -2147342456}