Fix #118709: Crash in OIDN GPU detection for unsupported HIP device
[blender.git] / intern / cycles / device / hiprt / device_impl.cpp
blobbd723c97d041ff71ffbe926a7b8b2daacb508630
1 /* SPDX-FileCopyrightText: 2011-2023 Blender Foundation
3 * SPDX-License-Identifier: Apache-2.0 */
5 #ifdef WITH_HIPRT
7 # include "device/hiprt/device_impl.h"
9 # include "util/debug.h"
10 # include "util/foreach.h"
11 # include "util/log.h"
12 # include "util/map.h"
13 # include "util/md5.h"
14 # include "util/path.h"
15 # include "util/progress.h"
16 # include "util/string.h"
17 # include "util/system.h"
18 # include "util/time.h"
19 # include "util/types.h"
20 # include "util/windows.h"
22 # include "bvh/hiprt.h"
24 # include "scene/hair.h"
25 # include "scene/mesh.h"
26 # include "scene/object.h"
27 # include "scene/pointcloud.h"
29 CCL_NAMESPACE_BEGIN
31 static void get_hiprt_transform(float matrix[][4], Transform &tfm)
33 int row = 0;
34 int col = 0;
35 matrix[row][col++] = tfm.x.x;
36 matrix[row][col++] = tfm.x.y;
37 matrix[row][col++] = tfm.x.z;
38 matrix[row][col++] = tfm.x.w;
39 row++;
40 col = 0;
41 matrix[row][col++] = tfm.y.x;
42 matrix[row][col++] = tfm.y.y;
43 matrix[row][col++] = tfm.y.z;
44 matrix[row][col++] = tfm.y.w;
45 row++;
46 col = 0;
47 matrix[row][col++] = tfm.z.x;
48 matrix[row][col++] = tfm.z.y;
49 matrix[row][col++] = tfm.z.z;
50 matrix[row][col++] = tfm.z.w;
53 class HIPRTDevice;
55 BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */) const
57 return BVH_LAYOUT_HIPRT;
60 HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
61 : HIPDevice(info, stats, profiler),
62 global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
63 hiprt_context(NULL),
64 scene(NULL),
65 functions_table(NULL),
66 scratch_buffer_size(0),
67 scratch_buffer(this, "scratch_buffer", MEM_DEVICE_ONLY),
68 prim_visibility(this, "prim_visibility", MEM_GLOBAL),
69 instance_transform_matrix(this, "instance_transform_matrix", MEM_READ_ONLY),
70 transform_headers(this, "transform_headers", MEM_READ_ONLY),
71 user_instance_id(this, "user_instance_id", MEM_GLOBAL),
72 hiprt_blas_ptr(this, "hiprt_blas_ptr", MEM_READ_WRITE),
73 blas_ptr(this, "blas_ptr", MEM_GLOBAL),
74 custom_prim_info(this, "custom_prim_info", MEM_GLOBAL),
75 custom_prim_info_offset(this, "custom_prim_info_offset", MEM_GLOBAL),
76 prims_time(this, "prims_time", MEM_GLOBAL),
77 prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
79 HIPContextScope scope(this);
80 hiprtContextCreationInput hiprt_context_input = {0};
81 hiprt_context_input.ctxt = hipContext;
82 hiprt_context_input.device = hipDevice;
83 hiprt_context_input.deviceType = hiprtDeviceAMD;
84 hiprtError rt_result = hiprtCreateContext(
85 HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
87 if (rt_result != hiprtSuccess) {
88 set_error(string_printf("Failed to create HIPRT context"));
89 return;
92 rt_result = hiprtCreateFuncTable(
93 hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table);
95 if (rt_result != hiprtSuccess) {
96 set_error(string_printf("Failed to create HIPRT Function Table"));
97 return;
100 hiprtSetLogLevel(hiprtLogLevelNone);
103 HIPRTDevice::~HIPRTDevice()
105 HIPContextScope scope(this);
106 user_instance_id.free();
107 prim_visibility.free();
108 hiprt_blas_ptr.free();
109 blas_ptr.free();
110 instance_transform_matrix.free();
111 transform_headers.free();
112 custom_prim_info_offset.free();
113 custom_prim_info.free();
114 prim_time_offset.free();
115 prims_time.free();
116 global_stack_buffer.free();
117 hiprtDestroyFuncTable(hiprt_context, functions_table);
118 hiprtDestroyScene(hiprt_context, scene);
119 hiprtDestroyContext(hiprt_context);
122 unique_ptr<DeviceQueue> HIPRTDevice::gpu_queue_create()
124 return make_unique<HIPRTDeviceQueue>(this);
127 string HIPRTDevice::compile_kernel_get_common_cflags(const uint kernel_features)
129 string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
131 cflags += " -D __HIPRT__ ";
133 return cflags;
136 string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
138 int major, minor;
139 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
140 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
141 const std::string arch = hipDeviceArch(hipDevId);
143 if (!use_adaptive_compilation()) {
144 const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb", name));
145 VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
146 if (path_exists(fatbin)) {
147 VLOG(1) << "Using precompiled kernel.";
148 return fatbin;
152 string source_path = path_get("source");
153 const string source_md5 = path_files_md5_hash(source_path);
155 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
156 const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
158 const string include_path = source_path;
159 const string bitcode_file = string_printf(
160 "cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
161 const string bitcode = path_cache_get(path_join("kernels", bitcode_file));
162 const string fatbin_file = string_printf(
163 "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
164 const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
166 VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
167 if (path_exists(fatbin)) {
168 VLOG(1) << "Using locally compiled kernel.";
169 return fatbin;
172 # ifdef _WIN32
173 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
174 if (!hipSupportsDevice(hipDevId)) {
175 set_error(
176 string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
177 "Your GPU is not supported.",
178 major,
179 minor));
181 else {
182 set_error(
183 string_printf("HIP binary kernel for this graphics card compute "
184 "capability (%d.%d) not found.",
185 major,
186 minor));
188 return string();
190 # endif
192 const char *const hipcc = hipewCompilerPath();
193 if (hipcc == NULL) {
194 set_error(
195 "HIP hipcc compiler not found. "
196 "Install HIP toolkit in default location.");
197 return string();
200 const int hipcc_hip_version = hipewCompilerVersion();
201 VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
202 if (hipcc_hip_version < 40) {
203 printf(
204 "Unsupported HIP version %d.%d detected, "
205 "you need HIP 4.0 or newer.\n",
206 hipcc_hip_version / 10,
207 hipcc_hip_version % 10);
208 return string();
211 path_create_directories(fatbin);
213 source_path = path_join(path_join(source_path, "kernel"),
214 path_join("device", path_join(base, string_printf("%s.cpp", name))));
216 printf("Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str());
218 double starttime = time_dt();
220 const string hiprt_path = getenv("HIPRT_ROOT_DIR");
221 // First, app kernels are compiled into bitcode, without access to implementation of HIP RT
222 // functions
223 if (!path_exists(bitcode)) {
225 std::string rtc_options;
227 rtc_options.append(" --offload-arch=").append(arch.c_str());
228 rtc_options.append(" -D __HIPRT__");
229 rtc_options.append(" -ffast-math -O3 -std=c++17");
230 rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
232 string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"",
233 hipcc,
234 rtc_options.c_str(),
235 include_path.c_str(),
236 hiprt_path.c_str(),
237 source_path.c_str(),
238 bitcode.c_str());
240 printf("Compiling %sHIP kernel ...\n%s\n",
241 (use_adaptive_compilation()) ? "adaptive " : "",
242 command.c_str());
244 # ifdef _WIN32
245 command = "call " + command;
246 # endif
247 if (system(command.c_str()) != 0) {
248 set_error(
249 "Failed to execute compilation command, "
250 "see console for details.");
251 return string();
255 // After compilation, the bitcode produced is linked with HIP RT bitcode (containing
256 // implementations of HIP RT functions, e.g. traversal, to produce the final executable code
257 string linker_options;
258 linker_options.append(" --offload-arch=").append(arch.c_str());
259 linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only ");
260 string hiprt_ver(HIPRT_VERSION_STR);
261 string hiprt_bc = hiprt_path + "\\dist\\bin\\Release\\hiprt" + hiprt_ver + "_amd_lib_win.bc";
263 string linker_command = string_printf("clang++ %s \"%s\" %s -o \"%s\"",
264 linker_options.c_str(),
265 bitcode.c_str(),
266 hiprt_bc.c_str(),
267 fatbin.c_str());
269 # ifdef _WIN32
270 linker_command = "call " + linker_command;
271 # endif
272 if (system(linker_command.c_str()) != 0) {
273 set_error(
274 "Failed to execute linking command, "
275 "see console for details.");
276 return string();
279 printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
281 return fatbin;
284 bool HIPRTDevice::load_kernels(const uint kernel_features)
286 if (hipModule) {
287 if (use_adaptive_compilation()) {
288 VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
290 return true;
293 if (hipContext == 0)
294 return false;
296 if (!support_device(kernel_features)) {
297 return false;
300 /* get kernel */
301 const char *kernel_name = "kernel";
302 string fatbin = compile_kernel(kernel_features, kernel_name);
303 if (fatbin.empty())
304 return false;
306 /* open module */
307 HIPContextScope scope(this);
309 string fatbin_data;
310 hipError_t result;
312 if (path_read_text(fatbin, fatbin_data)) {
314 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
316 else
317 result = hipErrorFileNotFound;
319 if (result != hipSuccess)
320 set_error(string_printf(
321 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
323 if (result == hipSuccess) {
324 kernels.load(this);
326 const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
327 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
328 (kernel_features & KERNEL_FEATURE_MNEE) ?
329 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
330 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
332 HIPRTDeviceQueue queue(this);
334 device_ptr d_path_index = 0;
335 device_ptr d_render_buffer = 0;
336 int d_work_size = 0;
337 DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
339 queue.init_execution();
340 queue.enqueue(test_kernel, 1, args);
341 queue.synchronize();
345 return (result == hipSuccess);
348 void HIPRTDevice::const_copy_to(const char *name, void *host, size_t size)
350 HIPContextScope scope(this);
351 hipDeviceptr_t mem;
352 size_t bytes;
354 if (strcmp(name, "data") == 0) {
355 assert(size <= sizeof(KernelData));
356 KernelData *const data = (KernelData *)host;
357 *(hiprtScene *)&data->device_bvh = scene;
360 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
361 assert(bytes == sizeof(KernelParamsHIPRT));
363 # define KERNEL_DATA_ARRAY(data_type, data_name) \
364 if (strcmp(name, #data_name) == 0) { \
365 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
366 return; \
368 KERNEL_DATA_ARRAY(KernelData, data)
369 KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
370 KERNEL_DATA_ARRAY(int, user_instance_id)
371 KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
372 KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
373 KERNEL_DATA_ARRAY(int2, custom_prim_info)
374 KERNEL_DATA_ARRAY(int, prim_time_offset)
375 KERNEL_DATA_ARRAY(float2, prims_time)
377 # include "kernel/data_arrays.h"
378 # undef KERNEL_DATA_ARRAY
381 hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh)
383 hiprtGeometryBuildInput geom_input;
384 geom_input.geomType = Triangle;
386 if (mesh->has_motion_blur()) {
388 const Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
389 const float3 *vert_steps = attr_mP->data_float3();
390 const size_t num_verts = mesh->get_verts().size();
391 const size_t num_steps = mesh->get_motion_steps();
392 const size_t num_triangles = mesh->num_triangles();
393 const float3 *verts = mesh->get_verts().data();
394 int num_bounds = 0;
396 if (bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split) {
397 bvh->custom_primitive_bound.alloc(num_triangles);
398 bvh->custom_prim_info.resize(num_triangles);
399 for (uint j = 0; j < num_triangles; j++) {
400 Mesh::Triangle t = mesh->get_triangle(j);
401 BoundBox bounds = BoundBox::empty;
402 t.bounds_grow(verts, bounds);
403 for (size_t step = 0; step < num_steps - 1; step++) {
404 t.bounds_grow(vert_steps + step * num_verts, bounds);
407 if (bounds.valid()) {
408 bvh->custom_primitive_bound[num_bounds] = bounds;
409 bvh->custom_prim_info[num_bounds].x = j;
410 bvh->custom_prim_info[num_bounds].y = mesh->primitive_type();
411 num_bounds++;
415 else {
416 const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
417 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
419 bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
420 bvh->custom_prim_info.resize(num_triangles * num_bvh_steps);
422 for (uint j = 0; j < num_triangles; j++) {
423 Mesh::Triangle t = mesh->get_triangle(j);
424 float3 prev_verts[3];
425 t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
426 BoundBox prev_bounds = BoundBox::empty;
427 prev_bounds.grow(prev_verts[0]);
428 prev_bounds.grow(prev_verts[1]);
429 prev_bounds.grow(prev_verts[2]);
431 for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
432 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
433 float3 curr_verts[3];
434 t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
435 BoundBox curr_bounds = BoundBox::empty;
436 curr_bounds.grow(curr_verts[0]);
437 curr_bounds.grow(curr_verts[1]);
438 curr_bounds.grow(curr_verts[2]);
439 BoundBox bounds = prev_bounds;
440 bounds.grow(curr_bounds);
441 if (bounds.valid()) {
442 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
443 bvh->custom_primitive_bound[num_bounds] = bounds;
444 bvh->custom_prim_info[num_bounds].x = j;
445 bvh->custom_prim_info[num_bounds].y = mesh->primitive_type();
446 bvh->prims_time[num_bounds].x = curr_time;
447 bvh->prims_time[num_bounds].y = prev_time;
448 num_bounds++;
450 prev_bounds = curr_bounds;
455 bvh->custom_prim_aabb.aabbCount = bvh->custom_primitive_bound.size();
456 bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
457 bvh->custom_primitive_bound.copy_to_device();
458 bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
460 geom_input.type = hiprtPrimitiveTypeAABBList;
461 geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
462 geom_input.geomType = Motion_Triangle;
464 else {
465 size_t triangle_size = mesh->get_triangles().size();
466 void *triangle_data = mesh->get_triangles().data();
468 size_t vertex_size = mesh->get_verts().size();
469 void *vertex_data = mesh->get_verts().data();
471 bvh->triangle_mesh.triangleCount = mesh->num_triangles();
472 bvh->triangle_mesh.triangleStride = 3 * sizeof(int);
473 bvh->triangle_mesh.vertexCount = vertex_size;
474 bvh->triangle_mesh.vertexStride = sizeof(float3);
476 bvh->triangle_index.host_pointer = triangle_data;
477 bvh->triangle_index.data_elements = 1;
478 bvh->triangle_index.data_type = TYPE_INT;
479 bvh->triangle_index.data_size = triangle_size;
480 bvh->triangle_index.copy_to_device();
481 bvh->triangle_mesh.triangleIndices = (void *)(bvh->triangle_index.device_pointer);
482 // either has to set the host pointer to zero, or increment the refcount on triangle_data
483 bvh->triangle_index.host_pointer = 0;
484 bvh->vertex_data.host_pointer = vertex_data;
485 bvh->vertex_data.data_elements = 4;
486 bvh->vertex_data.data_type = TYPE_FLOAT;
487 bvh->vertex_data.data_size = vertex_size;
488 bvh->vertex_data.copy_to_device();
489 bvh->triangle_mesh.vertices = (void *)(bvh->vertex_data.device_pointer);
490 bvh->vertex_data.host_pointer = 0;
492 geom_input.type = hiprtPrimitiveTypeTriangleMesh;
493 geom_input.triangleMesh.primitive = &(bvh->triangle_mesh);
496 return geom_input;
499 hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hair)
501 hiprtGeometryBuildInput geom_input;
503 const PrimitiveType primitive_type = hair->primitive_type();
504 const size_t num_curves = hair->num_curves();
505 const size_t num_segments = hair->num_segments();
506 const Attribute *curve_attr_mP = NULL;
508 if (curve_attr_mP == NULL || bvh->params.num_motion_curve_steps == 0) {
510 bvh->custom_prim_info.resize(num_segments);
511 bvh->custom_primitive_bound.alloc(num_segments);
513 else {
514 size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
515 bvh->custom_prim_info.resize(num_boxes);
516 bvh->custom_primitive_bound.alloc(num_boxes);
517 curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
520 int num_bounds = 0;
521 float3 *curve_keys = hair->get_curve_keys().data();
523 for (uint j = 0; j < num_curves; j++) {
524 const Hair::Curve curve = hair->get_curve(j);
525 const float *curve_radius = &hair->get_curve_radius()[0];
526 int first_key = curve.first_key;
527 for (int k = 0; k < curve.num_keys - 1; k++) {
528 if (curve_attr_mP == NULL || bvh->params.num_motion_curve_steps == 0) {
529 float3 current_keys[4];
530 current_keys[0] = curve_keys[max(first_key + k - 1, first_key)];
531 current_keys[1] = curve_keys[first_key + k];
532 current_keys[2] = curve_keys[first_key + k + 1];
533 current_keys[3] = curve_keys[min(first_key + k + 2, first_key + curve.num_keys - 1)];
535 if (current_keys[0].x == current_keys[1].x && current_keys[1].x == current_keys[2].x &&
536 current_keys[2].x == current_keys[3].x && current_keys[0].y == current_keys[1].y &&
537 current_keys[1].y == current_keys[2].y && current_keys[2].y == current_keys[3].y &&
538 current_keys[0].z == current_keys[1].z && current_keys[1].z == current_keys[2].z &&
539 current_keys[2].z == current_keys[3].z)
540 continue;
542 BoundBox bounds = BoundBox::empty;
543 curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
544 if (bounds.valid()) {
545 int type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
546 bvh->custom_prim_info[num_bounds].x = j;
547 bvh->custom_prim_info[num_bounds].y = type;
548 bvh->custom_primitive_bound[num_bounds] = bounds;
549 num_bounds++;
552 else {
554 const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
555 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
556 const size_t num_steps = hair->get_motion_steps();
557 const float3 *curve_keys = &hair->get_curve_keys()[0];
558 const float4 *key_steps = curve_attr_mP->data_float4();
559 const size_t num_keys = hair->get_curve_keys().size();
561 float4 prev_keys[4];
562 curve.cardinal_motion_keys(curve_keys,
563 curve_radius,
564 key_steps,
565 num_keys,
566 num_steps,
567 0.0f,
568 k - 1,
570 k + 1,
571 k + 2,
572 prev_keys);
573 BoundBox prev_bounds = BoundBox::empty;
574 curve.bounds_grow(prev_keys, prev_bounds);
576 for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
577 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
578 float4 curr_keys[4];
579 curve.cardinal_motion_keys(curve_keys,
580 curve_radius,
581 key_steps,
582 num_keys,
583 num_steps,
584 curr_time,
585 k - 1,
587 k + 1,
588 k + 2,
589 curr_keys);
590 BoundBox curr_bounds = BoundBox::empty;
591 curve.bounds_grow(curr_keys, curr_bounds);
592 BoundBox bounds = prev_bounds;
593 bounds.grow(curr_bounds);
594 if (bounds.valid()) {
595 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
596 int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
597 bvh->custom_prim_info[num_bounds].x = j;
598 bvh->custom_prim_info[num_bounds].y = packed_type; // k
599 bvh->custom_primitive_bound[num_bounds] = bounds;
600 bvh->prims_time[num_bounds].x = curr_time;
601 bvh->prims_time[num_bounds].y = prev_time;
602 num_bounds++;
604 prev_bounds = curr_bounds;
610 bvh->custom_prim_aabb.aabbCount = num_bounds;
611 bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
612 bvh->custom_primitive_bound.copy_to_device();
613 bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
615 geom_input.type = hiprtPrimitiveTypeAABBList;
616 geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
617 geom_input.geomType = Curve;
619 return geom_input;
622 hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud)
624 hiprtGeometryBuildInput geom_input;
626 const Attribute *point_attr_mP = NULL;
627 if (pointcloud->has_motion_blur()) {
628 point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
631 const float3 *points_data = pointcloud->get_points().data();
632 const float *radius_data = pointcloud->get_radius().data();
633 const size_t num_points = pointcloud->num_points();
634 const float4 *motion_data = (point_attr_mP) ? point_attr_mP->data_float4() : NULL;
635 const size_t num_steps = pointcloud->get_motion_steps();
637 int num_bounds = 0;
639 if (point_attr_mP == NULL) {
640 bvh->custom_primitive_bound.alloc(num_points);
641 for (uint j = 0; j < num_points; j++) {
642 const PointCloud::Point point = pointcloud->get_point(j);
643 BoundBox bounds = BoundBox::empty;
644 point.bounds_grow(points_data, radius_data, bounds);
645 if (bounds.valid()) {
646 bvh->custom_primitive_bound[num_bounds] = bounds;
647 bvh->custom_prim_info[num_bounds].x = j;
648 bvh->custom_prim_info[num_bounds].y = PRIMITIVE_POINT;
649 num_bounds++;
653 else if (bvh->params.num_motion_point_steps == 0) {
655 bvh->custom_primitive_bound.alloc(num_points * num_steps);
657 for (uint j = 0; j < num_points; j++) {
658 const PointCloud::Point point = pointcloud->get_point(j);
659 BoundBox bounds = BoundBox::empty;
660 point.bounds_grow(points_data, radius_data, bounds);
661 for (size_t step = 0; step < num_steps - 1; step++) {
662 point.bounds_grow(motion_data[step * num_points + j], bounds);
664 if (bounds.valid()) {
665 bvh->custom_primitive_bound[num_bounds] = bounds;
666 bvh->custom_prim_info[num_bounds].x = j;
667 bvh->custom_prim_info[num_bounds].y = PRIMITIVE_POINT;
668 num_bounds++;
672 else {
674 const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
675 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
677 bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
679 for (uint j = 0; j < num_points; j++) {
680 const PointCloud::Point point = pointcloud->get_point(j);
681 const size_t num_steps = pointcloud->get_motion_steps();
682 const float4 *point_steps = point_attr_mP->data_float4();
684 float4 prev_key = point.motion_key(
685 points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
686 BoundBox prev_bounds = BoundBox::empty;
687 point.bounds_grow(prev_key, prev_bounds);
689 for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
690 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
691 float4 curr_key = point.motion_key(
692 points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
693 BoundBox curr_bounds = BoundBox::empty;
694 point.bounds_grow(curr_key, curr_bounds);
695 BoundBox bounds = prev_bounds;
696 bounds.grow(curr_bounds);
697 if (bounds.valid()) {
698 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
699 bvh->custom_primitive_bound[num_bounds] = bounds;
700 bvh->custom_prim_info[num_bounds].x = j;
701 bvh->custom_prim_info[num_bounds].y = PRIMITIVE_MOTION_POINT;
702 bvh->prims_time[num_bounds].x = curr_time;
703 bvh->prims_time[num_bounds].y = prev_time;
704 num_bounds++;
706 prev_bounds = curr_bounds;
711 bvh->custom_prim_aabb.aabbCount = bvh->custom_primitive_bound.size();
712 bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
713 bvh->custom_primitive_bound.copy_to_device();
714 bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
716 geom_input.type = hiprtPrimitiveTypeAABBList;
717 geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
718 geom_input.geomType = Point;
720 return geom_input;
723 void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options)
725 hiprtGeometryBuildInput geom_input = {};
727 switch (geom->geometry_type) {
728 case Geometry::MESH:
729 case Geometry::VOLUME: {
730 Mesh *mesh = static_cast<Mesh *>(geom);
732 if (mesh->num_triangles() == 0)
733 return;
735 geom_input = prepare_triangle_blas(bvh, mesh);
736 break;
739 case Geometry::HAIR: {
740 Hair *const hair = static_cast<Hair *const>(geom);
742 if (hair->num_segments() == 0)
743 return;
745 geom_input = prepare_curve_blas(bvh, hair);
746 break;
749 case Geometry::POINTCLOUD: {
750 PointCloud *pointcloud = static_cast<PointCloud *>(geom);
751 if (pointcloud->num_points() == 0)
752 return;
754 geom_input = prepare_point_blas(bvh, pointcloud);
755 break;
758 default:
759 assert(geom_input.geomType != hiprtInvalidValue);
762 size_t blas_scratch_buffer_size = 0;
763 hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
764 hiprt_context, &geom_input, &options, &blas_scratch_buffer_size);
766 if (rt_err != hiprtSuccess) {
767 set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
770 rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom);
772 if (rt_err != hiprtSuccess) {
773 set_error(string_printf("Failed to create BLAS!"));
775 bvh->geom_input = geom_input;
777 thread_scoped_lock lock(hiprt_mutex);
778 if (blas_scratch_buffer_size > scratch_buffer_size) {
779 scratch_buffer.alloc(blas_scratch_buffer_size);
780 scratch_buffer_size = blas_scratch_buffer_size;
781 scratch_buffer.zero_to_device();
783 rt_err = hiprtBuildGeometry(hiprt_context,
784 hiprtBuildOperationBuild,
785 &bvh->geom_input,
786 &options,
787 (void *)(scratch_buffer.device_pointer),
789 bvh->hiprt_geom);
791 if (rt_err != hiprtSuccess) {
792 set_error(string_printf("Failed to build BLAS"));
796 hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
797 vector<Object *> objects,
798 hiprtBuildOptions options,
799 bool refit)
802 size_t num_object = objects.size();
803 if (num_object == 0) {
804 return 0;
807 hiprtBuildOperation build_operation = refit ? hiprtBuildOperationUpdate :
808 hiprtBuildOperationBuild;
810 array<hiprtFrameMatrix> transform_matrix;
812 unordered_map<Geometry *, int2> prim_info_map;
813 size_t custom_prim_offset = 0;
815 unordered_map<Geometry *, int> prim_time_map;
817 size_t num_instances = 0;
818 int blender_instance_id = 0;
820 user_instance_id.alloc(num_object);
821 prim_visibility.alloc(num_object);
822 hiprt_blas_ptr.alloc(num_object);
823 blas_ptr.alloc(num_object);
824 transform_headers.alloc(num_object);
825 custom_prim_info_offset.alloc(num_object);
826 prim_time_offset.alloc(num_object);
828 foreach (Object *ob, objects) {
829 uint32_t mask = 0;
830 if (ob->is_traceable()) {
831 mask = ob->visibility_for_tracing();
834 Transform current_transform = ob->get_tfm();
835 Geometry *geom = ob->get_geometry();
836 bool transform_applied = geom->transform_applied;
838 BVHHIPRT *current_bvh = static_cast<BVHHIPRT *>(geom->bvh);
839 bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
840 hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
842 hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
843 Transform identity_matrix = transform_identity();
844 get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
846 if (is_valid_geometry) {
847 bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
849 if (is_custom_prim) {
851 bool has_motion_blur = current_bvh->prims_time.size() > 0;
853 unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
855 if (prim_info_map.find(geom) != prim_info_map.end()) {
857 custom_prim_info_offset[blender_instance_id] = it->second;
859 if (has_motion_blur) {
861 prim_time_offset[blender_instance_id] = prim_time_map[geom];
864 else {
865 int offset = bvh->custom_prim_info.size();
867 prim_info_map[geom].x = offset;
868 prim_info_map[geom].y = custom_prim_offset;
870 bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
871 memcpy(bvh->custom_prim_info.data() + offset,
872 current_bvh->custom_prim_info.data(),
873 current_bvh->custom_prim_info.size() * sizeof(int2));
875 custom_prim_info_offset[blender_instance_id].x = offset;
876 custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
878 if (geom->geometry_type == Geometry::HAIR) {
879 custom_prim_offset += ((Hair *)geom)->num_curves();
881 else if (geom->geometry_type == Geometry::POINTCLOUD) {
882 custom_prim_offset += ((PointCloud *)geom)->num_points();
884 else {
885 custom_prim_offset += ((Mesh *)geom)->num_triangles();
888 if (has_motion_blur) {
889 int time_offset = bvh->prims_time.size();
890 prim_time_map[geom] = time_offset;
892 memcpy(bvh->prims_time.data() + time_offset,
893 current_bvh->prims_time.data(),
894 current_bvh->prims_time.size() * sizeof(float2));
896 prim_time_offset[blender_instance_id] = time_offset;
898 else
899 prim_time_offset[blender_instance_id] = -1;
902 else
903 custom_prim_info_offset[blender_instance_id] = {-1, -1};
905 hiprtTransformHeader current_header = {0};
906 current_header.frameCount = 1;
907 current_header.frameIndex = transform_matrix.size();
908 if (ob->get_motion().size()) {
909 int motion_size = ob->get_motion().size();
910 assert(motion_size == 1);
912 array<Transform> tfm_array = ob->get_motion();
913 float time_iternval = 1 / (float)(motion_size - 1);
914 current_header.frameCount = motion_size;
916 vector<hiprtFrameMatrix> tfm_hiprt_mb;
917 tfm_hiprt_mb.resize(motion_size);
918 for (int i = 0; i < motion_size; i++) {
919 get_hiprt_transform(tfm_hiprt_mb[i].matrix, tfm_array[i]);
920 tfm_hiprt_mb[i].time = (float)i * time_iternval;
921 transform_matrix.push_back_slow(tfm_hiprt_mb[i]);
924 else {
925 if (transform_applied)
926 current_transform = identity_matrix;
927 get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
928 transform_matrix.push_back_slow(hiprt_transform_matrix);
931 transform_headers[num_instances] = current_header;
933 user_instance_id[num_instances] = blender_instance_id;
934 prim_visibility[num_instances] = mask;
935 hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current;
936 num_instances++;
938 blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
939 blender_instance_id++;
942 int frame_count = transform_matrix.size();
943 hiprtSceneBuildInput scene_input_ptr = {0};
944 scene_input_ptr.instanceCount = num_instances;
945 scene_input_ptr.frameCount = frame_count;
946 scene_input_ptr.frameType = hiprtFrameTypeMatrix;
948 user_instance_id.copy_to_device();
949 prim_visibility.copy_to_device();
950 hiprt_blas_ptr.copy_to_device();
951 blas_ptr.copy_to_device();
952 transform_headers.copy_to_device();
954 instance_transform_matrix.alloc(frame_count);
955 instance_transform_matrix.host_pointer = transform_matrix.data();
956 instance_transform_matrix.data_elements = sizeof(hiprtFrameMatrix);
957 instance_transform_matrix.data_type = TYPE_UCHAR;
958 instance_transform_matrix.data_size = frame_count;
959 instance_transform_matrix.copy_to_device();
960 instance_transform_matrix.host_pointer = 0;
963 scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer;
964 scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer;
965 scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
966 scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
968 hiprtScene scene = 0;
970 hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene);
972 if (rt_err != hiprtSuccess) {
973 set_error(string_printf("Failed to create TLAS"));
976 size_t tlas_scratch_buffer_size;
977 rt_err = hiprtGetSceneBuildTemporaryBufferSize(
978 hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size);
980 if (rt_err != hiprtSuccess) {
981 set_error(string_printf("Failed to get scratch buffer size for TLAS"));
984 if (tlas_scratch_buffer_size > scratch_buffer_size) {
985 scratch_buffer.alloc(tlas_scratch_buffer_size);
986 scratch_buffer.zero_to_device();
989 rt_err = hiprtBuildScene(hiprt_context,
990 build_operation,
991 &scene_input_ptr,
992 &options,
993 (void *)scratch_buffer.device_pointer,
995 scene);
997 if (rt_err != hiprtSuccess) {
998 set_error(string_printf("Failed to build TLAS"));
1001 scratch_buffer.free();
1002 scratch_buffer_size = 0;
1004 if (bvh->custom_prim_info.size()) {
1005 size_t data_size = bvh->custom_prim_info.size();
1006 custom_prim_info.alloc(data_size);
1007 custom_prim_info.host_pointer = bvh->custom_prim_info.data();
1008 custom_prim_info.data_elements = 2;
1009 custom_prim_info.data_type = TYPE_INT;
1010 custom_prim_info.data_size = data_size;
1011 custom_prim_info.copy_to_device();
1012 custom_prim_info.host_pointer = 0;
1014 custom_prim_info_offset.copy_to_device();
1017 if (bvh->prims_time.size()) {
1018 size_t data_size = bvh->prims_time.size();
1019 prims_time.alloc(data_size);
1020 prims_time.host_pointer = bvh->prims_time.data();
1021 prims_time.data_elements = 2;
1022 prims_time.data_type = TYPE_FLOAT;
1023 prims_time.data_size = data_size;
1024 prims_time.copy_to_device();
1025 prims_time.host_pointer = 0;
1027 prim_time_offset.copy_to_device();
1030 size_t table_ptr_size = 0;
1031 hipDeviceptr_t table_device_ptr;
1033 hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params"));
1035 size_t kernel_param_offset[4];
1036 int table_index = 0;
1037 kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect);
1038 kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect);
1039 kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect);
1040 kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect);
1042 for (int index = 0; index < table_index; index++) {
1044 hip_assert(hipMemcpyHtoD(
1045 table_device_ptr + kernel_param_offset[index], &functions_table, sizeof(device_ptr)));
1048 return scene;
1051 void HIPRTDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
1053 progress.set_substatus("Building HIPRT acceleration structure");
1055 hiprtBuildOptions options;
1056 options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
1058 BVHHIPRT *bvh_rt = static_cast<BVHHIPRT *>(bvh);
1059 HIPContextScope scope(this);
1061 if (!bvh_rt->is_tlas()) {
1062 vector<Geometry *> geometry = bvh_rt->geometry;
1063 assert(geometry.size() == 1);
1064 Geometry *geom = geometry[0];
1065 build_blas(bvh_rt, geom, options);
1067 else {
1069 const vector<Object *> objects = bvh_rt->objects;
1070 scene = build_tlas(bvh_rt, objects, options, refit);
1073 CCL_NAMESPACE_END
1075 #endif