1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-reduc-dot-s8a.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-reduc-dot-s8a.s > vect-reduc-dot-s8a.ll
6 ; ModuleID = 'vect-reduc-dot-s8a.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @X = common global [64 x i8] zeroinitializer, align 16
11 @Y = common global [64 x i8] zeroinitializer, align 16
13 define i32 @foo1(i32 %len) nounwind uwtable noinline {
15 %tmp = icmp sgt i32 %len, 0
16 %smax = select i1 %tmp, i32 %len, i32 0
17 %tmp1 = zext i32 %smax to i64
20 for.cond: ; preds = %for.inc, %entry
21 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
22 %result.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
23 %arrayidx6 = getelementptr [64 x i8]* @Y, i64 0, i64 %indvar
24 %arrayidx = getelementptr [64 x i8]* @X, i64 0, i64 %indvar
25 %exitcond = icmp ne i64 %indvar, %tmp1
26 br i1 %exitcond, label %for.body, label %for.end
28 for.body: ; preds = %for.cond
31 for.inc: ; preds = %for.body
32 %tmp7 = load i8* %arrayidx6, align 1
33 %tmp3 = load i8* %arrayidx, align 1
34 %conv8 = sext i8 %tmp7 to i32
35 %conv = sext i8 %tmp3 to i32
36 %mul = mul i32 %conv, %conv8
37 %sext = shl i32 %mul, 16
38 %conv11 = ashr exact i32 %sext, 16
39 %add = add nsw i32 %result.0, %conv11
40 %indvar.next = add i64 %indvar, 1
43 for.end: ; preds = %for.cond
44 %result.0.lcssa = phi i32 [ %result.0, %for.cond ]
45 ret i32 %result.0.lcssa
48 define i32 @main() nounwind uwtable {
50 call void @check_vect()
53 for.cond: ; preds = %for.inc, %entry
54 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
55 %arrayidx7 = getelementptr [64 x i8]* @Y, i64 0, i64 %indvar
56 %tmp = mul i64 %indvar, -1
57 %tmp1 = add i64 %tmp, 64
58 %conv4 = trunc i64 %tmp1 to i8
59 %arrayidx = getelementptr [64 x i8]* @X, i64 0, i64 %indvar
60 %conv = trunc i64 %indvar to i8
61 %exitcond = icmp ne i64 %indvar, 64
62 br i1 %exitcond, label %for.body, label %for.end
64 for.body: ; preds = %for.cond
65 store i8 %conv, i8* %arrayidx, align 1
66 store i8 %conv4, i8* %arrayidx7, align 1
67 call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
70 for.inc: ; preds = %for.body
71 %indvar.next = add i64 %indvar, 1
74 for.end: ; preds = %for.cond
75 %call = call i32 @foo1(i32 64)
76 %cmp10 = icmp eq i32 %call, 43680
77 br i1 %cmp10, label %if.end, label %if.then
79 if.then: ; preds = %for.end
80 call void @abort() noreturn nounwind
83 if.end: ; preds = %for.end
87 define internal void @check_vect() nounwind uwtable noinline {
89 %a = alloca i32, align 4
90 %b = alloca i32, align 4
91 %c = alloca i32, align 4
92 %d = alloca i32, align 4
93 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
94 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
95 %tobool = icmp eq i32 %call1, 0
96 br i1 %tobool, label %if.then, label %lor.lhs.false
98 lor.lhs.false: ; preds = %entry
99 %tmp4 = load i32* %d, align 4
100 %and6 = and i32 %tmp4, 67108864
101 %cmp = icmp eq i32 %and6, 0
102 br i1 %cmp, label %if.then, label %if.end
104 if.then: ; preds = %entry, %lor.lhs.false
105 call void @exit(i32 0) noreturn nounwind
108 if.end: ; preds = %lor.lhs.false
109 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
113 declare void @abort() noreturn
115 declare void (i32)* @signal(i32, void (i32)*) nounwind
117 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
119 call void @exit(i32 0) noreturn nounwind
122 return: ; No predecessors!
126 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
128 %and = and i32 %__level, -2147483648
129 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
130 %cmp = icmp ult i32 %call, %__level
131 br i1 %cmp, label %if.then, label %if.end
133 if.then: ; preds = %entry
136 if.end: ; preds = %entry
137 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !1
138 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
139 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
140 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
141 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
142 store i32 %asmresult, i32* %__eax, align 4
143 store i32 %asmresult8, i32* %__ebx, align 4
144 store i32 %asmresult9, i32* %__ecx, align 4
145 store i32 %asmresult10, i32* %__edx, align 4
148 return: ; preds = %if.end, %if.then
149 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
153 declare void @exit(i32) noreturn
155 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
157 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !2
158 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
159 %tobool = icmp eq i32* %__sig, null
160 br i1 %tobool, label %if.end, label %if.then
162 if.then: ; preds = %entry
163 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
164 store i32 %asmresult1, i32* %__sig, align 4
167 if.end: ; preds = %entry, %if.then
171 !0 = metadata !{i32 881}
172 !1 = metadata !{i32 -2147342888, i32 -2147342880}
173 !2 = metadata !{i32 -2147343011, i32 -2147343003}