sdk/build-tools/26.0.0/renderscript/clang-include/cuda_builtin_vars.h

   1 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining a copy
   4  * of this software and associated documentation files (the "Software"), to deal
   5  * in the Software without restriction, including without limitation the rights
   6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7  * copies of the Software, and to permit persons to whom the Software is
   8  * furnished to do so, subject to the following conditions:
   9  *
  10  * The above copyright notice and this permission notice shall be included in
  11  * all copies or substantial portions of the Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19  * THE SOFTWARE.
  20  *
  21  *===-----------------------------------------------------------------------===
  22  */
  23
  24 #ifndef __CUDA_BUILTIN_VARS_H
  25 #define __CUDA_BUILTIN_VARS_H
  26
  27 // Forward declares from vector_types.h.
  28 struct uint3;
  29 struct dim3;
  30
  31 // The file implements built-in CUDA variables using __declspec(property).
  32 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
  33 // All read accesses of built-in variable fields get converted into calls to a
  34 // getter function which in turn calls the appropriate builtin to fetch the
  35 // value.
  36 //
  37 // Example:
  38 //    int x = threadIdx.x;
  39 // IR output:
  40 //  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
  41 // PTX output:
  42 //  mov.u32     %r2, %tid.x;
  43
  44 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
  45   __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
  46   static inline __attribute__((always_inline))                                 \
  47       __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
  48     return INTRINSIC;                                                          \
  49   }
  50
  51 #if __cplusplus >= 201103L
  52 #define __DELETE =delete
  53 #else
  54 #define __DELETE
  55 #endif
  56
  57 // Make sure nobody can create instances of the special varible types.  nvcc
  58 // also disallows taking address of special variables, so we disable address-of
  59 // operator as well.
  60 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
  61   __attribute__((device)) TypeName() __DELETE;                                 \
  62   __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
  63   __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
  64   __attribute__((device)) TypeName *operator&() const __DELETE
  65
  66 struct __cuda_builtin_threadIdx_t {
  67   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
  68   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
  69   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
  70   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  71   // uint3).  This function is defined after we pull in vector_types.h.
  72   __attribute__((device)) operator uint3() const;
  73 private:
  74   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
  75 };
  76
  77 struct __cuda_builtin_blockIdx_t {
  78   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
  79   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
  80   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
  81   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  82   // uint3).  This function is defined after we pull in vector_types.h.
  83   __attribute__((device)) operator uint3() const;
  84 private:
  85   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
  86 };
  87
  88 struct __cuda_builtin_blockDim_t {
  89   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
  90   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
  91   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
  92   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
  93   // dim3).  This function is defined after we pull in vector_types.h.
  94   __attribute__((device)) operator dim3() const;
  95 private:
  96   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
  97 };
  98
  99 struct __cuda_builtin_gridDim_t {
 100   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
 101   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
 102   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
 103   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
 104   // dim3).  This function is defined after we pull in vector_types.h.
 105   __attribute__((device)) operator dim3() const;
 106 private:
 107   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 108 };
 109
 110 #define __CUDA_BUILTIN_VAR                                                     \
 111   extern const __attribute__((device)) __attribute__((weak))
 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
 113 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
 114 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
 115 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
 116
 117 // warpSize should translate to read of %WARP_SZ but there's currently no
 118 // builtin to do so. According to PTX v4.2 docs 'to date, all target
 119 // architectures have a WARP_SZ value of 32'.
 120 __attribute__((device)) const int warpSize = 32;
 121
 122 #undef __CUDA_DEVICE_BUILTIN
 123 #undef __CUDA_BUILTIN_VAR
 124 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
 125
 126 #endif /* __CUDA_BUILTIN_VARS_H */