docs/experiments/matmul/matmul.s

   1 ; ModuleID = 'matmul.c'
   2 source_filename = "matmul.c"
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   4 target triple = "x86_64-unknown-linux-gnu"
   5
   6 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
   7 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
   8
   9 @A = common global [1536 x [1536 x float]] zeroinitializer, align 16
  10 @B = common global [1536 x [1536 x float]] zeroinitializer, align 16
  11 @stdout = external global %struct._IO_FILE*, align 8
  12 @.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
  13 @C = common global [1536 x [1536 x float]] zeroinitializer, align 16
  14 @.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
  15
  16 ; Function Attrs: nounwind uwtable
  17 define void @init_array() #0 {
  18 entry:
  19   %i = alloca i32, align 4
  20   %j = alloca i32, align 4
  21   store i32 0, i32* %i, align 4
  22   br label %for.cond
  23
  24 for.cond:                                         ; preds = %for.inc17, %entry
  25   %0 = load i32, i32* %i, align 4
  26   %cmp = icmp slt i32 %0, 1536
  27   br i1 %cmp, label %for.body, label %for.end19
  28
  29 for.body:                                         ; preds = %for.cond
  30   store i32 0, i32* %j, align 4
  31   br label %for.cond1
  32
  33 for.cond1:                                        ; preds = %for.inc, %for.body
  34   %1 = load i32, i32* %j, align 4
  35   %cmp2 = icmp slt i32 %1, 1536
  36   br i1 %cmp2, label %for.body3, label %for.end
  37
  38 for.body3:                                        ; preds = %for.cond1
  39   %2 = load i32, i32* %i, align 4
  40   %3 = load i32, i32* %j, align 4
  41   %mul = mul nsw i32 %2, %3
  42   %rem = srem i32 %mul, 1024
  43   %add = add nsw i32 1, %rem
  44   %conv = sitofp i32 %add to double
  45   %div = fdiv double %conv, 2.000000e+00
  46   %conv4 = fptrunc double %div to float
  47   %4 = load i32, i32* %j, align 4
  48   %idxprom = sext i32 %4 to i64
  49   %5 = load i32, i32* %i, align 4
  50   %idxprom5 = sext i32 %5 to i64
  51   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
  52   %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
  53   store float %conv4, float* %arrayidx6, align 4
  54   %6 = load i32, i32* %i, align 4
  55   %7 = load i32, i32* %j, align 4
  56   %mul7 = mul nsw i32 %6, %7
  57   %rem8 = srem i32 %mul7, 1024
  58   %add9 = add nsw i32 1, %rem8
  59   %conv10 = sitofp i32 %add9 to double
  60   %div11 = fdiv double %conv10, 2.000000e+00
  61   %conv12 = fptrunc double %div11 to float
  62   %8 = load i32, i32* %j, align 4
  63   %idxprom13 = sext i32 %8 to i64
  64   %9 = load i32, i32* %i, align 4
  65   %idxprom14 = sext i32 %9 to i64
  66   %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
  67   %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
  68   store float %conv12, float* %arrayidx16, align 4
  69   br label %for.inc
  70
  71 for.inc:                                          ; preds = %for.body3
  72   %10 = load i32, i32* %j, align 4
  73   %inc = add nsw i32 %10, 1
  74   store i32 %inc, i32* %j, align 4
  75   br label %for.cond1
  76
  77 for.end:                                          ; preds = %for.cond1
  78   br label %for.inc17
  79
  80 for.inc17:                                        ; preds = %for.end
  81   %11 = load i32, i32* %i, align 4
  82   %inc18 = add nsw i32 %11, 1
  83   store i32 %inc18, i32* %i, align 4
  84   br label %for.cond
  85
  86 for.end19:                                        ; preds = %for.cond
  87   ret void
  88 }
  89
  90 ; Function Attrs: nounwind uwtable
  91 define void @print_array() #0 {
  92 entry:
  93   %i = alloca i32, align 4
  94   %j = alloca i32, align 4
  95   store i32 0, i32* %i, align 4
  96   br label %for.cond
  97
  98 for.cond:                                         ; preds = %for.inc10, %entry
  99   %0 = load i32, i32* %i, align 4
 100   %cmp = icmp slt i32 %0, 1536
 101   br i1 %cmp, label %for.body, label %for.end12
 102
 103 for.body:                                         ; preds = %for.cond
 104   store i32 0, i32* %j, align 4
 105   br label %for.cond1
 106
 107 for.cond1:                                        ; preds = %for.inc, %for.body
 108   %1 = load i32, i32* %j, align 4
 109   %cmp2 = icmp slt i32 %1, 1536
 110   br i1 %cmp2, label %for.body3, label %for.end
 111
 112 for.body3:                                        ; preds = %for.cond1
 113   %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
 114   %3 = load i32, i32* %j, align 4
 115   %idxprom = sext i32 %3 to i64
 116   %4 = load i32, i32* %i, align 4
 117   %idxprom4 = sext i32 %4 to i64
 118   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
 119   %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
 120   %5 = load float, float* %arrayidx5, align 4
 121   %conv = fpext float %5 to double
 122   %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
 123   %6 = load i32, i32* %j, align 4
 124   %rem = srem i32 %6, 80
 125   %cmp6 = icmp eq i32 %rem, 79
 126   br i1 %cmp6, label %if.then, label %if.end
 127
 128 if.then:                                          ; preds = %for.body3
 129   %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
 130   %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
 131   br label %if.end
 132
 133 if.end:                                           ; preds = %if.then, %for.body3
 134   br label %for.inc
 135
 136 for.inc:                                          ; preds = %if.end
 137   %8 = load i32, i32* %j, align 4
 138   %inc = add nsw i32 %8, 1
 139   store i32 %inc, i32* %j, align 4
 140   br label %for.cond1
 141
 142 for.end:                                          ; preds = %for.cond1
 143   %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
 144   %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
 145   br label %for.inc10
 146
 147 for.inc10:                                        ; preds = %for.end
 148   %10 = load i32, i32* %i, align 4
 149   %inc11 = add nsw i32 %10, 1
 150   store i32 %inc11, i32* %i, align 4
 151   br label %for.cond
 152
 153 for.end12:                                        ; preds = %for.cond
 154   ret void
 155 }
 156
 157 declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
 158
 159 ; Function Attrs: nounwind uwtable
 160 define i32 @main() #0 {
 161 entry:
 162   %retval = alloca i32, align 4
 163   %i = alloca i32, align 4
 164   %j = alloca i32, align 4
 165   %k = alloca i32, align 4
 166   %t_start = alloca double, align 8
 167   %t_end = alloca double, align 8
 168   store i32 0, i32* %retval, align 4
 169   call void @init_array()
 170   store i32 0, i32* %i, align 4
 171   br label %for.cond
 172
 173 for.cond:                                         ; preds = %for.inc28, %entry
 174   %0 = load i32, i32* %i, align 4
 175   %cmp = icmp slt i32 %0, 1536
 176   br i1 %cmp, label %for.body, label %for.end30
 177
 178 for.body:                                         ; preds = %for.cond
 179   store i32 0, i32* %j, align 4
 180   br label %for.cond1
 181
 182 for.cond1:                                        ; preds = %for.inc25, %for.body
 183   %1 = load i32, i32* %j, align 4
 184   %cmp2 = icmp slt i32 %1, 1536
 185   br i1 %cmp2, label %for.body3, label %for.end27
 186
 187 for.body3:                                        ; preds = %for.cond1
 188   %2 = load i32, i32* %j, align 4
 189   %idxprom = sext i32 %2 to i64
 190   %3 = load i32, i32* %i, align 4
 191   %idxprom4 = sext i32 %3 to i64
 192   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
 193   %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
 194   store float 0.000000e+00, float* %arrayidx5, align 4
 195   store i32 0, i32* %k, align 4
 196   br label %for.cond6
 197
 198 for.cond6:                                        ; preds = %for.inc, %for.body3
 199   %4 = load i32, i32* %k, align 4
 200   %cmp7 = icmp slt i32 %4, 1536
 201   br i1 %cmp7, label %for.body8, label %for.end
 202
 203 for.body8:                                        ; preds = %for.cond6
 204   %5 = load i32, i32* %j, align 4
 205   %idxprom9 = sext i32 %5 to i64
 206   %6 = load i32, i32* %i, align 4
 207   %idxprom10 = sext i32 %6 to i64
 208   %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
 209   %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
 210   %7 = load float, float* %arrayidx12, align 4
 211   %8 = load i32, i32* %k, align 4
 212   %idxprom13 = sext i32 %8 to i64
 213   %9 = load i32, i32* %i, align 4
 214   %idxprom14 = sext i32 %9 to i64
 215   %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
 216   %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
 217   %10 = load float, float* %arrayidx16, align 4
 218   %11 = load i32, i32* %j, align 4
 219   %idxprom17 = sext i32 %11 to i64
 220   %12 = load i32, i32* %k, align 4
 221   %idxprom18 = sext i32 %12 to i64
 222   %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
 223   %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
 224   %13 = load float, float* %arrayidx20, align 4
 225   %mul = fmul float %10, %13
 226   %add = fadd float %7, %mul
 227   %14 = load i32, i32* %j, align 4
 228   %idxprom21 = sext i32 %14 to i64
 229   %15 = load i32, i32* %i, align 4
 230   %idxprom22 = sext i32 %15 to i64
 231   %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
 232   %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
 233   store float %add, float* %arrayidx24, align 4
 234   br label %for.inc
 235
 236 for.inc:                                          ; preds = %for.body8
 237   %16 = load i32, i32* %k, align 4
 238   %inc = add nsw i32 %16, 1
 239   store i32 %inc, i32* %k, align 4
 240   br label %for.cond6
 241
 242 for.end:                                          ; preds = %for.cond6
 243   br label %for.inc25
 244
 245 for.inc25:                                        ; preds = %for.end
 246   %17 = load i32, i32* %j, align 4
 247   %inc26 = add nsw i32 %17, 1
 248   store i32 %inc26, i32* %j, align 4
 249   br label %for.cond1
 250
 251 for.end27:                                        ; preds = %for.cond1
 252   br label %for.inc28
 253
 254 for.inc28:                                        ; preds = %for.end27
 255   %18 = load i32, i32* %i, align 4
 256   %inc29 = add nsw i32 %18, 1
 257   store i32 %inc29, i32* %i, align 4
 258   br label %for.cond
 259
 260 for.end30:                                        ; preds = %for.cond
 261   ret i32 0
 262 }
 263
 264 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 265 attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 266
 267 !llvm.ident = !{!0}
 268
 269 !0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}