1 /* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 * SPDX-License-Identifier: Apache-2.0 */
13 # include "device/hip/device_impl.h"
15 # include "util/debug.h"
16 # include "util/foreach.h"
17 # include "util/log.h"
18 # include "util/map.h"
19 # include "util/md5.h"
20 # include "util/path.h"
21 # include "util/string.h"
22 # include "util/system.h"
23 # include "util/time.h"
24 # include "util/types.h"
25 # include "util/windows.h"
27 # include "kernel/device/hip/globals.h"
33 bool HIPDevice::have_precompiled_kernels()
35 string fatbins_path
= path_get("lib");
36 return path_exists(fatbins_path
);
39 BVHLayoutMask
HIPDevice::get_bvh_layout_mask(uint
/*kernel_features*/) const
41 return BVH_LAYOUT_BVH2
;
44 void HIPDevice::set_error(const string
&error
)
46 Device::set_error(error
);
49 fprintf(stderr
, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
51 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
56 HIPDevice::HIPDevice(const DeviceInfo
&info
, Stats
&stats
, Profiler
&profiler
)
57 : GPUDevice(info
, stats
, profiler
)
59 /* Verify that base class types can be used with specific backend types */
60 static_assert(sizeof(texMemObject
) == sizeof(hipTextureObject_t
));
61 static_assert(sizeof(arrayMemObject
) == sizeof(hArray
));
71 need_texture_info
= false;
76 hipError_t result
= hipInit(0);
77 if (result
!= hipSuccess
) {
78 set_error(string_printf("Failed to initialize HIP runtime (%s)", hipewErrorString(result
)));
82 /* Setup device and context. */
83 result
= hipDeviceGet(&hipDevice
, hipDevId
);
84 if (result
!= hipSuccess
) {
85 set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
86 hipewErrorString(result
)));
90 /* hipDeviceMapHost for mapping host memory when out of device memory.
91 * hipDeviceLmemResizeToMax for reserving local memory ahead of render,
92 * so we can predict which memory to map to host. */
94 hip_assert(hipDeviceGetAttribute(&value
, hipDeviceAttributeCanMapHostMemory
, hipDevice
));
95 can_map_host
= value
!= 0;
98 hipDeviceGetAttribute(&pitch_alignment
, hipDeviceAttributeTexturePitchAlignment
, hipDevice
));
100 unsigned int ctx_flags
= hipDeviceLmemResizeToMax
;
102 ctx_flags
|= hipDeviceMapHost
;
106 /* Create context. */
107 result
= hipCtxCreate(&hipContext
, ctx_flags
, hipDevice
);
109 if (result
!= hipSuccess
) {
110 set_error(string_printf("Failed to create HIP context (%s)", hipewErrorString(result
)));
115 hipDeviceGetAttribute(&major
, hipDeviceAttributeComputeCapabilityMajor
, hipDevId
);
116 hipDeviceGetAttribute(&minor
, hipDeviceAttributeComputeCapabilityMinor
, hipDevId
);
117 hipDevArchitecture
= major
* 100 + minor
* 10;
119 /* Get hip runtime Version needed for memory types. */
120 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion
));
122 /* Pop context set by hipCtxCreate. */
123 hipCtxPopCurrent(NULL
);
126 HIPDevice::~HIPDevice()
130 hip_assert(hipModuleUnload(hipModule
));
132 hip_assert(hipCtxDestroy(hipContext
));
135 bool HIPDevice::support_device(const uint
/*kernel_features*/)
137 if (hipSupportsDevice(hipDevId
)) {
141 /* We only support Navi and above. */
142 hipDeviceProp_t props
;
143 hipGetDeviceProperties(&props
, hipDevId
);
145 set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
151 bool HIPDevice::check_peer_access(Device
*peer_device
)
153 if (peer_device
== this) {
156 if (peer_device
->info
.type
!= DEVICE_HIP
&& peer_device
->info
.type
!= DEVICE_OPTIX
) {
160 HIPDevice
*const peer_device_hip
= static_cast<HIPDevice
*>(peer_device
);
163 hip_assert(hipDeviceCanAccessPeer(&can_access
, hipDevice
, peer_device_hip
->hipDevice
));
164 if (can_access
== 0) {
168 // Ensure array access over the link is possible as well (for 3D textures)
169 hip_assert(hipDeviceGetP2PAttribute(
170 &can_access
, hipDevP2PAttrHipArrayAccessSupported
, hipDevice
, peer_device_hip
->hipDevice
));
171 if (can_access
== 0) {
175 // Enable peer access in both directions
177 const HIPContextScope
scope(this);
178 hipError_t result
= hipCtxEnablePeerAccess(peer_device_hip
->hipContext
, 0);
179 if (result
!= hipSuccess
) {
180 set_error(string_printf("Failed to enable peer access on HIP context (%s)",
181 hipewErrorString(result
)));
186 const HIPContextScope
scope(peer_device_hip
);
187 hipError_t result
= hipCtxEnablePeerAccess(hipContext
, 0);
188 if (result
!= hipSuccess
) {
189 set_error(string_printf("Failed to enable peer access on HIP context (%s)",
190 hipewErrorString(result
)));
198 bool HIPDevice::use_adaptive_compilation()
200 return DebugFlags().hip
.adaptive_compile
;
203 /* Common HIPCC flags which stays the same regardless of shading model,
204 * kernel sources md5 and only depends on compiler or compilation settings.
206 string
HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features
)
208 const int machine
= system_cpu_bits();
209 const string source_path
= path_get("source");
210 const string include_path
= source_path
;
211 string cflags
= string_printf(
217 include_path
.c_str());
218 if (use_adaptive_compilation()) {
219 cflags
+= " -D__KERNEL_FEATURES__=" + to_string(kernel_features
);
224 string
HIPDevice::compile_kernel(const uint kernel_features
, const char *name
, const char *base
)
226 /* Compute kernel name. */
228 hipDeviceGetAttribute(&major
, hipDeviceAttributeComputeCapabilityMajor
, hipDevId
);
229 hipDeviceGetAttribute(&minor
, hipDeviceAttributeComputeCapabilityMinor
, hipDevId
);
230 const std::string arch
= hipDeviceArch(hipDevId
);
232 /* Attempt to use kernel provided with Blender. */
233 if (!use_adaptive_compilation()) {
234 const string fatbin
= path_get(string_printf("lib/%s_%s.fatbin", name
, arch
.c_str()));
235 VLOG_INFO
<< "Testing for pre-compiled kernel " << fatbin
<< ".";
236 if (path_exists(fatbin
)) {
237 VLOG_INFO
<< "Using precompiled kernel.";
242 /* Try to use locally compiled kernel. */
243 string source_path
= path_get("source");
244 const string source_md5
= path_files_md5_hash(source_path
);
246 /* We include cflags into md5 so changing hip toolkit or changing other
247 * compiler command line arguments makes sure fatbin gets re-built.
249 string common_cflags
= compile_kernel_get_common_cflags(kernel_features
);
250 const string kernel_md5
= util_md5_string(source_md5
+ common_cflags
);
252 const char *const kernel_ext
= "genco";
255 options
.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
257 options
.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
260 options
.append(" -save-temps");
262 options
.append(" --amdgpu-target=").append(arch
.c_str());
264 const string include_path
= source_path
;
265 const string fatbin_file
= string_printf(
266 "cycles_%s_%s_%s", name
, arch
.c_str(), kernel_md5
.c_str());
267 const string fatbin
= path_cache_get(path_join("kernels", fatbin_file
));
268 VLOG_INFO
<< "Testing for locally compiled kernel " << fatbin
<< ".";
269 if (path_exists(fatbin
)) {
270 VLOG_INFO
<< "Using locally compiled kernel.";
275 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
276 if (!hipSupportsDevice(hipDevId
)) {
278 string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
279 "Your GPU is not supported.",
285 string_printf("HIP binary kernel for this graphics card compute "
286 "capability (%d.%d) not found.",
295 const char *const hipcc
= hipewCompilerPath();
298 "HIP hipcc compiler not found. "
299 "Install HIP toolkit in default location.");
303 const int hipcc_hip_version
= hipewCompilerVersion();
304 VLOG_INFO
<< "Found hipcc " << hipcc
<< ", HIP version " << hipcc_hip_version
<< ".";
305 if (hipcc_hip_version
< 40) {
307 "Unsupported HIP version %d.%d detected, "
308 "you need HIP 4.0 or newer.\n",
309 hipcc_hip_version
/ 10,
310 hipcc_hip_version
% 10);
314 double starttime
= time_dt();
316 path_create_directories(fatbin
);
318 source_path
= path_join(path_join(source_path
, "kernel"),
319 path_join("device", path_join(base
, string_printf("%s.cpp", name
))));
321 string command
= string_printf("%s -%s -I %s --%s %s -o \"%s\"",
324 include_path
.c_str(),
329 printf("Compiling %sHIP kernel ...\n%s\n",
330 (use_adaptive_compilation()) ? "adaptive " : "",
334 command
= "call " + command
;
336 if (system(command
.c_str()) != 0) {
338 "Failed to execute compilation command, "
339 "see console for details.");
343 /* Verify if compilation succeeded */
344 if (!path_exists(fatbin
)) {
346 "HIP kernel compilation failed, "
347 "see console for details.");
351 printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime
);
356 bool HIPDevice::load_kernels(const uint kernel_features
)
358 /* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
360 * Currently re-loading kernels will invalidate memory pointers.
363 if (use_adaptive_compilation()) {
364 VLOG_INFO
<< "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
369 /* check if hip init succeeded */
373 /* check if GPU is supported */
374 if (!support_device(kernel_features
)) {
379 const char *kernel_name
= "kernel";
380 string fatbin
= compile_kernel(kernel_features
, kernel_name
);
385 HIPContextScope
scope(this);
390 if (path_read_text(fatbin
, fatbin_data
))
391 result
= hipModuleLoadData(&hipModule
, fatbin_data
.c_str());
393 result
= hipErrorFileNotFound
;
395 if (result
!= hipSuccess
)
396 set_error(string_printf(
397 "Failed to load HIP kernel from '%s' (%s)", fatbin
.c_str(), hipewErrorString(result
)));
399 if (result
== hipSuccess
) {
401 reserve_local_memory(kernel_features
);
404 return (result
== hipSuccess
);
407 void HIPDevice::reserve_local_memory(const uint kernel_features
)
409 /* Together with hipDeviceLmemResizeToMax, this reserves local memory
410 * needed for kernel launches, so that we can reliably figure out when
411 * to allocate scene data in mapped host memory. */
412 size_t total
= 0, free_before
= 0, free_after
= 0;
415 HIPContextScope
scope(this);
416 hipMemGetInfo(&free_before
, &total
);
420 /* Use the biggest kernel for estimation. */
421 const DeviceKernel test_kernel
= (kernel_features
& KERNEL_FEATURE_NODE_RAYTRACE
) ?
422 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
:
423 (kernel_features
& KERNEL_FEATURE_MNEE
) ?
424 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
:
425 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
;
427 /* Launch kernel, using just 1 block appears sufficient to reserve memory for all
428 * multiprocessors. It would be good to do this in parallel for the multi GPU case
429 * still to make it faster. */
430 HIPDeviceQueue
queue(this);
432 device_ptr d_path_index
= 0;
433 device_ptr d_render_buffer
= 0;
435 DeviceKernelArguments
args(&d_path_index
, &d_render_buffer
, &d_work_size
);
437 queue
.init_execution();
438 queue
.enqueue(test_kernel
, 1, args
);
443 HIPContextScope
scope(this);
444 hipMemGetInfo(&free_after
, &total
);
447 VLOG_INFO
<< "Local memory reserved " << string_human_readable_number(free_before
- free_after
)
448 << " bytes. (" << string_human_readable_size(free_before
- free_after
) << ")";
451 /* For testing mapped host memory, fill up device memory. */
452 const size_t keep_mb
= 1024;
454 while (free_after
> keep_mb
* 1024 * 1024LL) {
456 hip_assert(hipMalloc(&tmp
, 10 * 1024 * 1024LL));
457 hipMemGetInfo(&free_after
, &total
);
462 void HIPDevice::get_device_memory_info(size_t &total
, size_t &free
)
464 HIPContextScope
scope(this);
466 hipMemGetInfo(&free
, &total
);
469 bool HIPDevice::alloc_device(void *&device_pointer
, size_t size
)
471 HIPContextScope
scope(this);
473 hipError_t mem_alloc_result
= hipMalloc((hipDeviceptr_t
*)&device_pointer
, size
);
474 return mem_alloc_result
== hipSuccess
;
477 void HIPDevice::free_device(void *device_pointer
)
479 HIPContextScope
scope(this);
481 hip_assert(hipFree((hipDeviceptr_t
)device_pointer
));
484 bool HIPDevice::alloc_host(void *&shared_pointer
, size_t size
)
486 HIPContextScope
scope(this);
488 hipError_t mem_alloc_result
= hipHostMalloc(
489 &shared_pointer
, size
, hipHostMallocMapped
| hipHostMallocWriteCombined
);
491 return mem_alloc_result
== hipSuccess
;
494 void HIPDevice::free_host(void *shared_pointer
)
496 HIPContextScope
scope(this);
498 hipHostFree(shared_pointer
);
501 void HIPDevice::transform_host_pointer(void *&device_pointer
, void *&shared_pointer
)
503 HIPContextScope
scope(this);
505 hip_assert(hipHostGetDevicePointer((hipDeviceptr_t
*)&device_pointer
, shared_pointer
, 0));
508 void HIPDevice::copy_host_to_device(void *device_pointer
, void *host_pointer
, size_t size
)
510 const HIPContextScope
scope(this);
512 hip_assert(hipMemcpyHtoD((hipDeviceptr_t
)device_pointer
, host_pointer
, size
));
515 void HIPDevice::mem_alloc(device_memory
&mem
)
517 if (mem
.type
== MEM_TEXTURE
) {
518 assert(!"mem_alloc not supported for textures.");
520 else if (mem
.type
== MEM_GLOBAL
) {
521 assert(!"mem_alloc not supported for global memory.");
528 void HIPDevice::mem_copy_to(device_memory
&mem
)
530 if (mem
.type
== MEM_GLOBAL
) {
534 else if (mem
.type
== MEM_TEXTURE
) {
535 tex_free((device_texture
&)mem
);
536 tex_alloc((device_texture
&)mem
);
539 if (!mem
.device_pointer
) {
542 generic_copy_to(mem
);
546 void HIPDevice::mem_copy_from(device_memory
&mem
, size_t y
, size_t w
, size_t h
, size_t elem
)
548 if (mem
.type
== MEM_TEXTURE
|| mem
.type
== MEM_GLOBAL
) {
549 assert(!"mem_copy_from not supported for textures.");
551 else if (mem
.host_pointer
) {
552 const size_t size
= elem
* w
* h
;
553 const size_t offset
= elem
* y
* w
;
555 if (mem
.device_pointer
) {
556 const HIPContextScope
scope(this);
557 hip_assert(hipMemcpyDtoH(
558 (char *)mem
.host_pointer
+ offset
, (hipDeviceptr_t
)mem
.device_pointer
+ offset
, size
));
561 memset((char *)mem
.host_pointer
+ offset
, 0, size
);
566 void HIPDevice::mem_zero(device_memory
&mem
)
568 if (!mem
.device_pointer
) {
571 if (!mem
.device_pointer
) {
575 /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
576 * regardless of mem.host_pointer and mem.shared_pointer. */
577 thread_scoped_lock
lock(device_mem_map_mutex
);
578 if (!device_mem_map
[&mem
].use_mapped_host
|| mem
.host_pointer
!= mem
.shared_pointer
) {
579 const HIPContextScope
scope(this);
580 hip_assert(hipMemsetD8((hipDeviceptr_t
)mem
.device_pointer
, 0, mem
.memory_size()));
582 else if (mem
.host_pointer
) {
583 memset(mem
.host_pointer
, 0, mem
.memory_size());
587 void HIPDevice::mem_free(device_memory
&mem
)
589 if (mem
.type
== MEM_GLOBAL
) {
592 else if (mem
.type
== MEM_TEXTURE
) {
593 tex_free((device_texture
&)mem
);
600 device_ptr
HIPDevice::mem_alloc_sub_ptr(device_memory
&mem
, size_t offset
, size_t /*size*/)
602 return (device_ptr
)(((char *)mem
.device_pointer
) + mem
.memory_elements_size(offset
));
605 void HIPDevice::const_copy_to(const char *name
, void *host
, size_t size
)
607 HIPContextScope
scope(this);
611 hip_assert(hipModuleGetGlobal(&mem
, &bytes
, hipModule
, "kernel_params"));
612 assert(bytes
== sizeof(KernelParamsHIP
));
614 /* Update data storage pointers in launch parameters. */
615 # define KERNEL_DATA_ARRAY(data_type, data_name) \
616 if (strcmp(name, #data_name) == 0) { \
617 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
620 KERNEL_DATA_ARRAY(KernelData
, data
)
621 KERNEL_DATA_ARRAY(IntegratorStateGPU
, integrator_state
)
622 # include "kernel/data_arrays.h"
623 # undef KERNEL_DATA_ARRAY
626 void HIPDevice::global_alloc(device_memory
&mem
)
628 if (mem
.is_resident(this)) {
630 generic_copy_to(mem
);
633 const_copy_to(mem
.name
, &mem
.device_pointer
, sizeof(mem
.device_pointer
));
636 void HIPDevice::global_free(device_memory
&mem
)
638 if (mem
.is_resident(this) && mem
.device_pointer
) {
643 void HIPDevice::tex_alloc(device_texture
&mem
)
645 HIPContextScope
scope(this);
647 size_t dsize
= datatype_size(mem
.data_type
);
648 size_t size
= mem
.memory_size();
650 hipTextureAddressMode address_mode
= hipAddressModeWrap
;
651 switch (mem
.info
.extension
) {
652 case EXTENSION_REPEAT
:
653 address_mode
= hipAddressModeWrap
;
655 case EXTENSION_EXTEND
:
656 address_mode
= hipAddressModeClamp
;
659 address_mode
= hipAddressModeBorder
;
661 case EXTENSION_MIRROR
:
662 address_mode
= hipAddressModeMirror
;
669 hipTextureFilterMode filter_mode
;
670 if (mem
.info
.interpolation
== INTERPOLATION_CLOSEST
) {
671 filter_mode
= hipFilterModePoint
;
674 filter_mode
= hipFilterModeLinear
;
677 /* Image Texture Storage */
678 hipArray_Format format
;
679 switch (mem
.data_type
) {
681 format
= HIP_AD_FORMAT_UNSIGNED_INT8
;
684 format
= HIP_AD_FORMAT_UNSIGNED_INT16
;
687 format
= HIP_AD_FORMAT_UNSIGNED_INT32
;
690 format
= HIP_AD_FORMAT_SIGNED_INT32
;
693 format
= HIP_AD_FORMAT_FLOAT
;
696 format
= HIP_AD_FORMAT_HALF
;
704 hArray array_3d
= NULL
;
705 size_t src_pitch
= mem
.data_width
* dsize
* mem
.data_elements
;
706 size_t dst_pitch
= src_pitch
;
708 if (!mem
.is_resident(this)) {
709 thread_scoped_lock
lock(device_mem_map_mutex
);
710 cmem
= &device_mem_map
[&mem
];
713 if (mem
.data_depth
> 1) {
714 array_3d
= (hArray
)mem
.device_pointer
;
715 cmem
->array
= reinterpret_cast<arrayMemObject
>(array_3d
);
717 else if (mem
.data_height
> 0) {
718 dst_pitch
= align_up(src_pitch
, pitch_alignment
);
721 else if (mem
.data_depth
> 1) {
722 /* 3D texture using array, there is no API for linear memory. */
723 HIP_ARRAY3D_DESCRIPTOR desc
;
725 desc
.Width
= mem
.data_width
;
726 desc
.Height
= mem
.data_height
;
727 desc
.Depth
= mem
.data_depth
;
728 desc
.Format
= format
;
729 desc
.NumChannels
= mem
.data_elements
;
732 VLOG_WORK
<< "Array 3D allocate: " << mem
.name
<< ", "
733 << string_human_readable_number(mem
.memory_size()) << " bytes. ("
734 << string_human_readable_size(mem
.memory_size()) << ")";
736 hip_assert(hipArray3DCreate((hArray
*)&array_3d
, &desc
));
743 memset(¶m
, 0, sizeof(HIP_MEMCPY3D
));
744 param
.dstMemoryType
= get_memory_type(hipMemoryTypeArray
);
745 param
.dstArray
= array_3d
;
746 param
.srcMemoryType
= get_memory_type(hipMemoryTypeHost
);
747 param
.srcHost
= mem
.host_pointer
;
748 param
.srcPitch
= src_pitch
;
749 param
.WidthInBytes
= param
.srcPitch
;
750 param
.Height
= mem
.data_height
;
751 param
.Depth
= mem
.data_depth
;
753 hip_assert(hipDrvMemcpy3D(¶m
));
755 mem
.device_pointer
= (device_ptr
)array_3d
;
756 mem
.device_size
= size
;
757 stats
.mem_alloc(size
);
759 thread_scoped_lock
lock(device_mem_map_mutex
);
760 cmem
= &device_mem_map
[&mem
];
762 cmem
->array
= reinterpret_cast<arrayMemObject
>(array_3d
);
764 else if (mem
.data_height
> 0) {
765 /* 2D texture, using pitch aligned linear memory. */
766 dst_pitch
= align_up(src_pitch
, pitch_alignment
);
767 size_t dst_size
= dst_pitch
* mem
.data_height
;
769 cmem
= generic_alloc(mem
, dst_size
- mem
.memory_size());
775 memset(¶m
, 0, sizeof(param
));
776 param
.dstMemoryType
= get_memory_type(hipMemoryTypeDevice
);
777 param
.dstDevice
= mem
.device_pointer
;
778 param
.dstPitch
= dst_pitch
;
779 param
.srcMemoryType
= get_memory_type(hipMemoryTypeHost
);
780 param
.srcHost
= mem
.host_pointer
;
781 param
.srcPitch
= src_pitch
;
782 param
.WidthInBytes
= param
.srcPitch
;
783 param
.Height
= mem
.data_height
;
785 hip_assert(hipDrvMemcpy2DUnaligned(¶m
));
788 /* 1D texture, using linear memory. */
789 cmem
= generic_alloc(mem
);
794 hip_assert(hipMemcpyHtoD(mem
.device_pointer
, mem
.host_pointer
, size
));
798 const uint slot
= mem
.slot
;
799 if (slot
>= texture_info
.size()) {
800 /* Allocate some slots in advance, to reduce amount
801 * of re-allocations. */
802 texture_info
.resize(slot
+ 128);
805 /* Set Mapping and tag that we need to (re-)upload to device */
806 texture_info
[slot
] = mem
.info
;
807 need_texture_info
= true;
809 if (mem
.info
.data_type
!= IMAGE_DATA_TYPE_NANOVDB_FLOAT
&&
810 mem
.info
.data_type
!= IMAGE_DATA_TYPE_NANOVDB_FLOAT3
&&
811 mem
.info
.data_type
!= IMAGE_DATA_TYPE_NANOVDB_FPN
&&
812 mem
.info
.data_type
!= IMAGE_DATA_TYPE_NANOVDB_FP16
)
814 /* Bindless textures. */
815 hipResourceDesc resDesc
;
816 memset(&resDesc
, 0, sizeof(resDesc
));
819 resDesc
.resType
= hipResourceTypeArray
;
820 resDesc
.res
.array
.h_Array
= array_3d
;
823 else if (mem
.data_height
> 0) {
824 resDesc
.resType
= hipResourceTypePitch2D
;
825 resDesc
.res
.pitch2D
.devPtr
= mem
.device_pointer
;
826 resDesc
.res
.pitch2D
.format
= format
;
827 resDesc
.res
.pitch2D
.numChannels
= mem
.data_elements
;
828 resDesc
.res
.pitch2D
.height
= mem
.data_height
;
829 resDesc
.res
.pitch2D
.width
= mem
.data_width
;
830 resDesc
.res
.pitch2D
.pitchInBytes
= dst_pitch
;
833 resDesc
.resType
= hipResourceTypeLinear
;
834 resDesc
.res
.linear
.devPtr
= mem
.device_pointer
;
835 resDesc
.res
.linear
.format
= format
;
836 resDesc
.res
.linear
.numChannels
= mem
.data_elements
;
837 resDesc
.res
.linear
.sizeInBytes
= mem
.device_size
;
840 hipTextureDesc texDesc
;
841 memset(&texDesc
, 0, sizeof(texDesc
));
842 texDesc
.addressMode
[0] = address_mode
;
843 texDesc
.addressMode
[1] = address_mode
;
844 texDesc
.addressMode
[2] = address_mode
;
845 texDesc
.filterMode
= filter_mode
;
846 texDesc
.flags
= HIP_TRSF_NORMALIZED_COORDINATES
;
848 thread_scoped_lock
lock(device_mem_map_mutex
);
849 cmem
= &device_mem_map
[&mem
];
851 if (hipTexObjectCreate(&cmem
->texobject
, &resDesc
, &texDesc
, NULL
) != hipSuccess
) {
853 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
857 texture_info
[slot
].data
= (uint64_t)cmem
->texobject
;
860 texture_info
[slot
].data
= (uint64_t)mem
.device_pointer
;
864 void HIPDevice::tex_free(device_texture
&mem
)
866 if (mem
.device_pointer
) {
867 HIPContextScope
scope(this);
868 thread_scoped_lock
lock(device_mem_map_mutex
);
869 DCHECK(device_mem_map
.find(&mem
) != device_mem_map
.end());
870 const Mem
&cmem
= device_mem_map
[&mem
];
872 if (cmem
.texobject
) {
873 /* Free bindless texture. */
874 hipTexObjectDestroy(cmem
.texobject
);
877 if (!mem
.is_resident(this)) {
878 /* Do not free memory here, since it was allocated on a different device. */
879 device_mem_map
.erase(device_mem_map
.find(&mem
));
881 else if (cmem
.array
) {
883 hipArrayDestroy(reinterpret_cast<hArray
>(cmem
.array
));
884 stats
.mem_free(mem
.device_size
);
885 mem
.device_pointer
= 0;
888 device_mem_map
.erase(device_mem_map
.find(&mem
));
897 unique_ptr
<DeviceQueue
> HIPDevice::gpu_queue_create()
899 return make_unique
<HIPDeviceQueue
>(this);
902 bool HIPDevice::should_use_graphics_interop()
904 /* Check whether this device is part of OpenGL context.
906 * Using HIP device for graphics interoperability which is not part of the OpenGL context is
907 * possible, but from the empiric measurements it can be considerably slower than using naive
910 /* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
912 HIPContextScope
scope(this);
914 int num_all_devices
= 0;
915 hip_assert(hipGetDeviceCount(&num_all_devices
));
917 if (num_all_devices
== 0) {
921 vector
<hipDevice_t
> gl_devices(num_all_devices
);
922 uint num_gl_devices
= 0;
923 hipGLGetDevices(&num_gl_devices
, gl_devices
.data(), num_all_devices
, hipGLDeviceListAll
);
925 for (hipDevice_t gl_device
: gl_devices
) {
926 if (gl_device
== hipDevice
) {
935 int HIPDevice::get_num_multiprocessors()
937 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount
, 0);
940 int HIPDevice::get_max_num_threads_per_multiprocessor()
942 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor
, 0);
945 bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute
, int *value
)
947 HIPContextScope
scope(this);
949 return hipDeviceGetAttribute(value
, attribute
, hipDevice
) == hipSuccess
;
952 int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute
, int default_value
)
955 if (!get_device_attribute(attribute
, &value
)) {
956 return default_value
;
961 hipMemoryType
HIPDevice::get_memory_type(hipMemoryType mem_type
)
963 return get_hip_memory_type(mem_type
, hipRuntimeVersion
);