test/GPGPU/phi-nodes-in-kernel.ll

   1 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
   2 ; RUN: -disable-output < %s | \
   3 ; RUN: FileCheck -check-prefix=CODE %s
   4
   5 ; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
   6 ; RUN: FileCheck %s -check-prefix=IR
   7
   8 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
   9 ; RUN: -disable-output < %s | \
  10 ; RUN: FileCheck %s -check-prefix=KERNEL-IR
  11
  12 ; REQUIRES: pollyacc
  13
  14 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  15 target triple = "x86_64-unknown-linux-gnu"
  16
  17 ; CODE: # host
  18 ; CODE-NEXT: {
  19 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_out_l_055__phi, &MemRef_out_l_055__phi, sizeof(i32), cudaMemcpyHostToDevice));
  20 ; CODE-NEXT:   {
  21 ; CODE-NEXT:     dim3 k0_dimBlock(32);
  22 ; CODE-NEXT:     dim3 k0_dimGrid(2);
  23 ; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_out_l_055__phi, dev_MemRef_out_l_055, dev_MemRef_c);
  24 ; CODE-NEXT:     cudaCheckKernel();
  25 ; CODE-NEXT:   }
  26
  27 ; CODE:   cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055__phi, dev_MemRef_out_l_055__phi, sizeof(i32), cudaMemcpyDeviceToHost));
  28 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055, dev_MemRef_out_l_055, sizeof(i32), cudaMemcpyDeviceToHost));
  29 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
  30 ; CODE-NEXT: }
  31
  32 ; CODE: # kernel0
  33 ; CODE-NEXT: if (32 * b0 + t0 <= 48) {
  34 ; CODE-NEXT:   if (b0 == 1 && t0 == 16)
  35 ; CODE-NEXT:     Stmt_for_cond1_preheader(0);
  36 ; CODE-NEXT:   Stmt_for_body17(0, 32 * b0 + t0);
  37 ; CODE-NEXT:   if (b0 == 1 && t0 == 16)
  38 ; CODE-NEXT:     Stmt_for_cond15_for_cond12_loopexit_crit_edge(0);
  39 ; CODE-NEXT: }
  40
  41 ; IR:      [[REGA:%.+]] = bitcast i32* %out_l.055.phiops to i8*
  42 ; IR-NEXT: call void @polly_copyFromHostToDevice(i8* [[REGA]], i8* %p_dev_array_MemRef_out_l_055__phi, i64 4)
  43
  44 ; IR:      [[REGB:%.+]] = bitcast i32* %out_l.055.phiops to i8*
  45 ; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_out_l_055__phi, i8* [[REGB]], i64 4)
  46 ; IR-NEXT: [[REGC:%.+]] = bitcast i32* %out_l.055.s2a to i8*
  47 ; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_out_l_055, i8* [[REGC]], i64 4)
  48
  49 ; KERNEL-IR: entry:
  50 ; KERNEL-IR-NEXT:   %out_l.055.s2a = alloca i32
  51 ; KERNEL-IR-NEXT:   %out_l.055.phiops = alloca i32
  52 ; KERNEL-IR-NEXT:   %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32*
  53 ; KERNEL-IR-NEXT:   %2 = load i32, i32* %1
  54 ; KERNEL-IR-NEXT:   store i32 %2, i32* %out_l.055.phiops
  55 ; KERNEL-IR-NEXT:   %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32*
  56 ; KERNEL-IR-NEXT:   %4 = load i32, i32* %3
  57 ; KERNEL-IR-NEXT:   store i32 %4, i32* %out_l.055.s2a
  58
  59
  60 define void @kernel_dynprog([50 x i32]* %c) {
  61 entry:
  62   %arrayidx77 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 49
  63   br label %for.cond1.preheader
  64
  65 for.cond1.preheader:                              ; preds = %for.cond15.for.cond12.loopexit_crit_edge, %entry
  66   %out_l.055 = phi i32 [ 0, %entry ], [ %add78, %for.cond15.for.cond12.loopexit_crit_edge ]
  67   %iter.054 = phi i32 [ 0, %entry ], [ %inc80, %for.cond15.for.cond12.loopexit_crit_edge ]
  68   br label %for.body17
  69
  70 for.cond15.for.cond12.loopexit_crit_edge:         ; preds = %for.body17
  71   %tmp = load i32, i32* %arrayidx77, align 4
  72   %add78 = add nsw i32 %tmp, %out_l.055
  73   %inc80 = add nuw nsw i32 %iter.054, 1
  74   br i1 false, label %for.cond1.preheader, label %for.end81
  75
  76 for.body17:                                       ; preds = %for.body17, %for.cond1.preheader
  77   %indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
  78   %arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
  79   store i32 undef, i32* %arrayidx69, align 4
  80   %indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
  81   %lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
  82   %exitcond75 = icmp ne i32 %lftr.wideiv74, 50
  83   br i1 %exitcond75, label %for.body17, label %for.cond15.for.cond12.loopexit_crit_edge
  84
  85 for.end81:                                        ; preds = %for.cond15.for.cond12.loopexit_crit_edge
  86   ret void
  87 }