1 /* SPDX-FileCopyrightText: 2011-2023 Blender Foundation
3 * SPDX-License-Identifier: Apache-2.0 */
7 # include "device/hiprt/device_impl.h"
9 # include "util/debug.h"
10 # include "util/foreach.h"
11 # include "util/log.h"
12 # include "util/map.h"
13 # include "util/md5.h"
14 # include "util/path.h"
15 # include "util/progress.h"
16 # include "util/string.h"
17 # include "util/system.h"
18 # include "util/time.h"
19 # include "util/types.h"
20 # include "util/windows.h"
22 # include "bvh/hiprt.h"
24 # include "scene/hair.h"
25 # include "scene/mesh.h"
26 # include "scene/object.h"
27 # include "scene/pointcloud.h"
31 static void get_hiprt_transform(float matrix
[][4], Transform
&tfm
)
35 matrix
[row
][col
++] = tfm
.x
.x
;
36 matrix
[row
][col
++] = tfm
.x
.y
;
37 matrix
[row
][col
++] = tfm
.x
.z
;
38 matrix
[row
][col
++] = tfm
.x
.w
;
41 matrix
[row
][col
++] = tfm
.y
.x
;
42 matrix
[row
][col
++] = tfm
.y
.y
;
43 matrix
[row
][col
++] = tfm
.y
.z
;
44 matrix
[row
][col
++] = tfm
.y
.w
;
47 matrix
[row
][col
++] = tfm
.z
.x
;
48 matrix
[row
][col
++] = tfm
.z
.y
;
49 matrix
[row
][col
++] = tfm
.z
.z
;
50 matrix
[row
][col
++] = tfm
.z
.w
;
55 BVHLayoutMask
HIPRTDevice::get_bvh_layout_mask(const uint
/* kernel_features */) const
57 return BVH_LAYOUT_HIPRT
;
60 HIPRTDevice::HIPRTDevice(const DeviceInfo
&info
, Stats
&stats
, Profiler
&profiler
)
61 : HIPDevice(info
, stats
, profiler
),
62 global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY
),
65 functions_table(NULL
),
66 scratch_buffer_size(0),
67 scratch_buffer(this, "scratch_buffer", MEM_DEVICE_ONLY
),
68 prim_visibility(this, "prim_visibility", MEM_GLOBAL
),
69 instance_transform_matrix(this, "instance_transform_matrix", MEM_READ_ONLY
),
70 transform_headers(this, "transform_headers", MEM_READ_ONLY
),
71 user_instance_id(this, "user_instance_id", MEM_GLOBAL
),
72 hiprt_blas_ptr(this, "hiprt_blas_ptr", MEM_READ_WRITE
),
73 blas_ptr(this, "blas_ptr", MEM_GLOBAL
),
74 custom_prim_info(this, "custom_prim_info", MEM_GLOBAL
),
75 custom_prim_info_offset(this, "custom_prim_info_offset", MEM_GLOBAL
),
76 prims_time(this, "prims_time", MEM_GLOBAL
),
77 prim_time_offset(this, "prim_time_offset", MEM_GLOBAL
)
79 HIPContextScope
scope(this);
80 hiprtContextCreationInput hiprt_context_input
= {0};
81 hiprt_context_input
.ctxt
= hipContext
;
82 hiprt_context_input
.device
= hipDevice
;
83 hiprt_context_input
.deviceType
= hiprtDeviceAMD
;
84 hiprtError rt_result
= hiprtCreateContext(
85 HIPRT_API_VERSION
, hiprt_context_input
, &hiprt_context
);
87 if (rt_result
!= hiprtSuccess
) {
88 set_error(string_printf("Failed to create HIPRT context"));
92 rt_result
= hiprtCreateFuncTable(
93 hiprt_context
, Max_Primitive_Type
, Max_Intersect_Filter_Function
, &functions_table
);
95 if (rt_result
!= hiprtSuccess
) {
96 set_error(string_printf("Failed to create HIPRT Function Table"));
100 hiprtSetLogLevel(hiprtLogLevelNone
);
103 HIPRTDevice::~HIPRTDevice()
105 HIPContextScope
scope(this);
106 user_instance_id
.free();
107 prim_visibility
.free();
108 hiprt_blas_ptr
.free();
110 instance_transform_matrix
.free();
111 transform_headers
.free();
112 custom_prim_info_offset
.free();
113 custom_prim_info
.free();
114 prim_time_offset
.free();
116 global_stack_buffer
.free();
117 hiprtDestroyFuncTable(hiprt_context
, functions_table
);
118 hiprtDestroyScene(hiprt_context
, scene
);
119 hiprtDestroyContext(hiprt_context
);
122 unique_ptr
<DeviceQueue
> HIPRTDevice::gpu_queue_create()
124 return make_unique
<HIPRTDeviceQueue
>(this);
127 string
HIPRTDevice::compile_kernel_get_common_cflags(const uint kernel_features
)
129 string cflags
= HIPDevice::compile_kernel_get_common_cflags(kernel_features
);
131 cflags
+= " -D __HIPRT__ ";
136 string
HIPRTDevice::compile_kernel(const uint kernel_features
, const char *name
, const char *base
)
139 hipDeviceGetAttribute(&major
, hipDeviceAttributeComputeCapabilityMajor
, hipDevId
);
140 hipDeviceGetAttribute(&minor
, hipDeviceAttributeComputeCapabilityMinor
, hipDevId
);
141 const std::string arch
= hipDeviceArch(hipDevId
);
143 if (!use_adaptive_compilation()) {
144 const string fatbin
= path_get(string_printf("lib/%s_rt_gfx.hipfb", name
));
145 VLOG(1) << "Testing for pre-compiled kernel " << fatbin
<< ".";
146 if (path_exists(fatbin
)) {
147 VLOG(1) << "Using precompiled kernel.";
152 string source_path
= path_get("source");
153 const string source_md5
= path_files_md5_hash(source_path
);
155 string common_cflags
= compile_kernel_get_common_cflags(kernel_features
);
156 const string kernel_md5
= util_md5_string(source_md5
+ common_cflags
);
158 const string include_path
= source_path
;
159 const string bitcode_file
= string_printf(
160 "cycles_%s_%s_%s.bc", name
, arch
.c_str(), kernel_md5
.c_str());
161 const string bitcode
= path_cache_get(path_join("kernels", bitcode_file
));
162 const string fatbin_file
= string_printf(
163 "cycles_%s_%s_%s.hipfb", name
, arch
.c_str(), kernel_md5
.c_str());
164 const string fatbin
= path_cache_get(path_join("kernels", fatbin_file
));
166 VLOG(1) << "Testing for locally compiled kernel " << fatbin
<< ".";
167 if (path_exists(fatbin
)) {
168 VLOG(1) << "Using locally compiled kernel.";
173 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
174 if (!hipSupportsDevice(hipDevId
)) {
176 string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
177 "Your GPU is not supported.",
183 string_printf("HIP binary kernel for this graphics card compute "
184 "capability (%d.%d) not found.",
192 const char *const hipcc
= hipewCompilerPath();
195 "HIP hipcc compiler not found. "
196 "Install HIP toolkit in default location.");
200 const int hipcc_hip_version
= hipewCompilerVersion();
201 VLOG_INFO
<< "Found hipcc " << hipcc
<< ", HIP version " << hipcc_hip_version
<< ".";
202 if (hipcc_hip_version
< 40) {
204 "Unsupported HIP version %d.%d detected, "
205 "you need HIP 4.0 or newer.\n",
206 hipcc_hip_version
/ 10,
207 hipcc_hip_version
% 10);
211 path_create_directories(fatbin
);
213 source_path
= path_join(path_join(source_path
, "kernel"),
214 path_join("device", path_join(base
, string_printf("%s.cpp", name
))));
216 printf("Compiling %s and caching to %s", source_path
.c_str(), fatbin
.c_str());
218 double starttime
= time_dt();
220 const string hiprt_path
= getenv("HIPRT_ROOT_DIR");
221 // First, app kernels are compiled into bitcode, without access to implementation of HIP RT
223 if (!path_exists(bitcode
)) {
225 std::string rtc_options
;
227 rtc_options
.append(" --offload-arch=").append(arch
.c_str());
228 rtc_options
.append(" -D __HIPRT__");
229 rtc_options
.append(" -ffast-math -O3 -std=c++17");
230 rtc_options
.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
232 string command
= string_printf("%s %s -I %s -I %s %s -o \"%s\"",
235 include_path
.c_str(),
240 printf("Compiling %sHIP kernel ...\n%s\n",
241 (use_adaptive_compilation()) ? "adaptive " : "",
245 command
= "call " + command
;
247 if (system(command
.c_str()) != 0) {
249 "Failed to execute compilation command, "
250 "see console for details.");
255 // After compilation, the bitcode produced is linked with HIP RT bitcode (containing
256 // implementations of HIP RT functions, e.g. traversal, to produce the final executable code
257 string linker_options
;
258 linker_options
.append(" --offload-arch=").append(arch
.c_str());
259 linker_options
.append(" -fgpu-rdc --hip-link --cuda-device-only ");
260 string
hiprt_ver(HIPRT_VERSION_STR
);
261 string hiprt_bc
= hiprt_path
+ "\\dist\\bin\\Release\\hiprt" + hiprt_ver
+ "_amd_lib_win.bc";
263 string linker_command
= string_printf("clang++ %s \"%s\" %s -o \"%s\"",
264 linker_options
.c_str(),
270 linker_command
= "call " + linker_command
;
272 if (system(linker_command
.c_str()) != 0) {
274 "Failed to execute linking command, "
275 "see console for details.");
279 printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime
);
284 bool HIPRTDevice::load_kernels(const uint kernel_features
)
287 if (use_adaptive_compilation()) {
288 VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
296 if (!support_device(kernel_features
)) {
301 const char *kernel_name
= "kernel";
302 string fatbin
= compile_kernel(kernel_features
, kernel_name
);
307 HIPContextScope
scope(this);
312 if (path_read_text(fatbin
, fatbin_data
)) {
314 result
= hipModuleLoadData(&hipModule
, fatbin_data
.c_str());
317 result
= hipErrorFileNotFound
;
319 if (result
!= hipSuccess
)
320 set_error(string_printf(
321 "Failed to load HIP kernel from '%s' (%s)", fatbin
.c_str(), hipewErrorString(result
)));
323 if (result
== hipSuccess
) {
326 const DeviceKernel test_kernel
= (kernel_features
& KERNEL_FEATURE_NODE_RAYTRACE
) ?
327 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
:
328 (kernel_features
& KERNEL_FEATURE_MNEE
) ?
329 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
:
330 DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
;
332 HIPRTDeviceQueue
queue(this);
334 device_ptr d_path_index
= 0;
335 device_ptr d_render_buffer
= 0;
337 DeviceKernelArguments
args(&d_path_index
, &d_render_buffer
, &d_work_size
);
339 queue
.init_execution();
340 queue
.enqueue(test_kernel
, 1, args
);
345 return (result
== hipSuccess
);
348 void HIPRTDevice::const_copy_to(const char *name
, void *host
, size_t size
)
350 HIPContextScope
scope(this);
354 if (strcmp(name
, "data") == 0) {
355 assert(size
<= sizeof(KernelData
));
356 KernelData
*const data
= (KernelData
*)host
;
357 *(hiprtScene
*)&data
->device_bvh
= scene
;
360 hip_assert(hipModuleGetGlobal(&mem
, &bytes
, hipModule
, "kernel_params"));
361 assert(bytes
== sizeof(KernelParamsHIPRT
));
363 # define KERNEL_DATA_ARRAY(data_type, data_name) \
364 if (strcmp(name, #data_name) == 0) { \
365 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
368 KERNEL_DATA_ARRAY(KernelData
, data
)
369 KERNEL_DATA_ARRAY(IntegratorStateGPU
, integrator_state
)
370 KERNEL_DATA_ARRAY(int, user_instance_id
)
371 KERNEL_DATA_ARRAY(uint64_t, blas_ptr
)
372 KERNEL_DATA_ARRAY(int2
, custom_prim_info_offset
)
373 KERNEL_DATA_ARRAY(int2
, custom_prim_info
)
374 KERNEL_DATA_ARRAY(int, prim_time_offset
)
375 KERNEL_DATA_ARRAY(float2
, prims_time
)
377 # include "kernel/data_arrays.h"
378 # undef KERNEL_DATA_ARRAY
381 hiprtGeometryBuildInput
HIPRTDevice::prepare_triangle_blas(BVHHIPRT
*bvh
, Mesh
*mesh
)
383 hiprtGeometryBuildInput geom_input
;
384 geom_input
.geomType
= Triangle
;
386 if (mesh
->has_motion_blur()) {
388 const Attribute
*attr_mP
= mesh
->attributes
.find(ATTR_STD_MOTION_VERTEX_POSITION
);
389 const float3
*vert_steps
= attr_mP
->data_float3();
390 const size_t num_verts
= mesh
->get_verts().size();
391 const size_t num_steps
= mesh
->get_motion_steps();
392 const size_t num_triangles
= mesh
->num_triangles();
393 const float3
*verts
= mesh
->get_verts().data();
396 if (bvh
->params
.num_motion_triangle_steps
== 0 || bvh
->params
.use_spatial_split
) {
397 bvh
->custom_primitive_bound
.alloc(num_triangles
);
398 bvh
->custom_prim_info
.resize(num_triangles
);
399 for (uint j
= 0; j
< num_triangles
; j
++) {
400 Mesh::Triangle t
= mesh
->get_triangle(j
);
401 BoundBox bounds
= BoundBox::empty
;
402 t
.bounds_grow(verts
, bounds
);
403 for (size_t step
= 0; step
< num_steps
- 1; step
++) {
404 t
.bounds_grow(vert_steps
+ step
* num_verts
, bounds
);
407 if (bounds
.valid()) {
408 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
409 bvh
->custom_prim_info
[num_bounds
].x
= j
;
410 bvh
->custom_prim_info
[num_bounds
].y
= mesh
->primitive_type();
416 const int num_bvh_steps
= bvh
->params
.num_motion_triangle_steps
* 2 + 1;
417 const float num_bvh_steps_inv_1
= 1.0f
/ (num_bvh_steps
- 1);
419 bvh
->custom_primitive_bound
.alloc(num_triangles
* num_bvh_steps
);
420 bvh
->custom_prim_info
.resize(num_triangles
* num_bvh_steps
);
422 for (uint j
= 0; j
< num_triangles
; j
++) {
423 Mesh::Triangle t
= mesh
->get_triangle(j
);
424 float3 prev_verts
[3];
425 t
.motion_verts(verts
, vert_steps
, num_verts
, num_steps
, 0.0f
, prev_verts
);
426 BoundBox prev_bounds
= BoundBox::empty
;
427 prev_bounds
.grow(prev_verts
[0]);
428 prev_bounds
.grow(prev_verts
[1]);
429 prev_bounds
.grow(prev_verts
[2]);
431 for (int bvh_step
= 1; bvh_step
< num_bvh_steps
; ++bvh_step
) {
432 const float curr_time
= (float)(bvh_step
)*num_bvh_steps_inv_1
;
433 float3 curr_verts
[3];
434 t
.motion_verts(verts
, vert_steps
, num_verts
, num_steps
, curr_time
, curr_verts
);
435 BoundBox curr_bounds
= BoundBox::empty
;
436 curr_bounds
.grow(curr_verts
[0]);
437 curr_bounds
.grow(curr_verts
[1]);
438 curr_bounds
.grow(curr_verts
[2]);
439 BoundBox bounds
= prev_bounds
;
440 bounds
.grow(curr_bounds
);
441 if (bounds
.valid()) {
442 const float prev_time
= (float)(bvh_step
- 1) * num_bvh_steps_inv_1
;
443 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
444 bvh
->custom_prim_info
[num_bounds
].x
= j
;
445 bvh
->custom_prim_info
[num_bounds
].y
= mesh
->primitive_type();
446 bvh
->prims_time
[num_bounds
].x
= curr_time
;
447 bvh
->prims_time
[num_bounds
].y
= prev_time
;
450 prev_bounds
= curr_bounds
;
455 bvh
->custom_prim_aabb
.aabbCount
= bvh
->custom_primitive_bound
.size();
456 bvh
->custom_prim_aabb
.aabbStride
= sizeof(BoundBox
);
457 bvh
->custom_primitive_bound
.copy_to_device();
458 bvh
->custom_prim_aabb
.aabbs
= (void *)bvh
->custom_primitive_bound
.device_pointer
;
460 geom_input
.type
= hiprtPrimitiveTypeAABBList
;
461 geom_input
.aabbList
.primitive
= &bvh
->custom_prim_aabb
;
462 geom_input
.geomType
= Motion_Triangle
;
465 size_t triangle_size
= mesh
->get_triangles().size();
466 void *triangle_data
= mesh
->get_triangles().data();
468 size_t vertex_size
= mesh
->get_verts().size();
469 void *vertex_data
= mesh
->get_verts().data();
471 bvh
->triangle_mesh
.triangleCount
= mesh
->num_triangles();
472 bvh
->triangle_mesh
.triangleStride
= 3 * sizeof(int);
473 bvh
->triangle_mesh
.vertexCount
= vertex_size
;
474 bvh
->triangle_mesh
.vertexStride
= sizeof(float3
);
476 bvh
->triangle_index
.host_pointer
= triangle_data
;
477 bvh
->triangle_index
.data_elements
= 1;
478 bvh
->triangle_index
.data_type
= TYPE_INT
;
479 bvh
->triangle_index
.data_size
= triangle_size
;
480 bvh
->triangle_index
.copy_to_device();
481 bvh
->triangle_mesh
.triangleIndices
= (void *)(bvh
->triangle_index
.device_pointer
);
482 // either has to set the host pointer to zero, or increment the refcount on triangle_data
483 bvh
->triangle_index
.host_pointer
= 0;
484 bvh
->vertex_data
.host_pointer
= vertex_data
;
485 bvh
->vertex_data
.data_elements
= 4;
486 bvh
->vertex_data
.data_type
= TYPE_FLOAT
;
487 bvh
->vertex_data
.data_size
= vertex_size
;
488 bvh
->vertex_data
.copy_to_device();
489 bvh
->triangle_mesh
.vertices
= (void *)(bvh
->vertex_data
.device_pointer
);
490 bvh
->vertex_data
.host_pointer
= 0;
492 geom_input
.type
= hiprtPrimitiveTypeTriangleMesh
;
493 geom_input
.triangleMesh
.primitive
= &(bvh
->triangle_mesh
);
499 hiprtGeometryBuildInput
HIPRTDevice::prepare_curve_blas(BVHHIPRT
*bvh
, Hair
*hair
)
501 hiprtGeometryBuildInput geom_input
;
503 const PrimitiveType primitive_type
= hair
->primitive_type();
504 const size_t num_curves
= hair
->num_curves();
505 const size_t num_segments
= hair
->num_segments();
506 const Attribute
*curve_attr_mP
= NULL
;
508 if (curve_attr_mP
== NULL
|| bvh
->params
.num_motion_curve_steps
== 0) {
510 bvh
->custom_prim_info
.resize(num_segments
);
511 bvh
->custom_primitive_bound
.alloc(num_segments
);
514 size_t num_boxes
= bvh
->params
.num_motion_curve_steps
* 2 * num_segments
;
515 bvh
->custom_prim_info
.resize(num_boxes
);
516 bvh
->custom_primitive_bound
.alloc(num_boxes
);
517 curve_attr_mP
= hair
->attributes
.find(ATTR_STD_MOTION_VERTEX_POSITION
);
521 float3
*curve_keys
= hair
->get_curve_keys().data();
523 for (uint j
= 0; j
< num_curves
; j
++) {
524 const Hair::Curve curve
= hair
->get_curve(j
);
525 const float *curve_radius
= &hair
->get_curve_radius()[0];
526 int first_key
= curve
.first_key
;
527 for (int k
= 0; k
< curve
.num_keys
- 1; k
++) {
528 if (curve_attr_mP
== NULL
|| bvh
->params
.num_motion_curve_steps
== 0) {
529 float3 current_keys
[4];
530 current_keys
[0] = curve_keys
[max(first_key
+ k
- 1, first_key
)];
531 current_keys
[1] = curve_keys
[first_key
+ k
];
532 current_keys
[2] = curve_keys
[first_key
+ k
+ 1];
533 current_keys
[3] = curve_keys
[min(first_key
+ k
+ 2, first_key
+ curve
.num_keys
- 1)];
535 if (current_keys
[0].x
== current_keys
[1].x
&& current_keys
[1].x
== current_keys
[2].x
&&
536 current_keys
[2].x
== current_keys
[3].x
&& current_keys
[0].y
== current_keys
[1].y
&&
537 current_keys
[1].y
== current_keys
[2].y
&& current_keys
[2].y
== current_keys
[3].y
&&
538 current_keys
[0].z
== current_keys
[1].z
&& current_keys
[1].z
== current_keys
[2].z
&&
539 current_keys
[2].z
== current_keys
[3].z
)
542 BoundBox bounds
= BoundBox::empty
;
543 curve
.bounds_grow(k
, &hair
->get_curve_keys()[0], curve_radius
, bounds
);
544 if (bounds
.valid()) {
545 int type
= PRIMITIVE_PACK_SEGMENT(primitive_type
, k
);
546 bvh
->custom_prim_info
[num_bounds
].x
= j
;
547 bvh
->custom_prim_info
[num_bounds
].y
= type
;
548 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
554 const int num_bvh_steps
= bvh
->params
.num_motion_curve_steps
* 2 + 1;
555 const float num_bvh_steps_inv_1
= 1.0f
/ (num_bvh_steps
- 1);
556 const size_t num_steps
= hair
->get_motion_steps();
557 const float3
*curve_keys
= &hair
->get_curve_keys()[0];
558 const float4
*key_steps
= curve_attr_mP
->data_float4();
559 const size_t num_keys
= hair
->get_curve_keys().size();
562 curve
.cardinal_motion_keys(curve_keys
,
573 BoundBox prev_bounds
= BoundBox::empty
;
574 curve
.bounds_grow(prev_keys
, prev_bounds
);
576 for (int bvh_step
= 1; bvh_step
< num_bvh_steps
; ++bvh_step
) {
577 const float curr_time
= (float)(bvh_step
)*num_bvh_steps_inv_1
;
579 curve
.cardinal_motion_keys(curve_keys
,
590 BoundBox curr_bounds
= BoundBox::empty
;
591 curve
.bounds_grow(curr_keys
, curr_bounds
);
592 BoundBox bounds
= prev_bounds
;
593 bounds
.grow(curr_bounds
);
594 if (bounds
.valid()) {
595 const float prev_time
= (float)(bvh_step
- 1) * num_bvh_steps_inv_1
;
596 int packed_type
= PRIMITIVE_PACK_SEGMENT(primitive_type
, k
);
597 bvh
->custom_prim_info
[num_bounds
].x
= j
;
598 bvh
->custom_prim_info
[num_bounds
].y
= packed_type
; // k
599 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
600 bvh
->prims_time
[num_bounds
].x
= curr_time
;
601 bvh
->prims_time
[num_bounds
].y
= prev_time
;
604 prev_bounds
= curr_bounds
;
610 bvh
->custom_prim_aabb
.aabbCount
= num_bounds
;
611 bvh
->custom_prim_aabb
.aabbStride
= sizeof(BoundBox
);
612 bvh
->custom_primitive_bound
.copy_to_device();
613 bvh
->custom_prim_aabb
.aabbs
= (void *)bvh
->custom_primitive_bound
.device_pointer
;
615 geom_input
.type
= hiprtPrimitiveTypeAABBList
;
616 geom_input
.aabbList
.primitive
= &bvh
->custom_prim_aabb
;
617 geom_input
.geomType
= Curve
;
622 hiprtGeometryBuildInput
HIPRTDevice::prepare_point_blas(BVHHIPRT
*bvh
, PointCloud
*pointcloud
)
624 hiprtGeometryBuildInput geom_input
;
626 const Attribute
*point_attr_mP
= NULL
;
627 if (pointcloud
->has_motion_blur()) {
628 point_attr_mP
= pointcloud
->attributes
.find(ATTR_STD_MOTION_VERTEX_POSITION
);
631 const float3
*points_data
= pointcloud
->get_points().data();
632 const float *radius_data
= pointcloud
->get_radius().data();
633 const size_t num_points
= pointcloud
->num_points();
634 const float4
*motion_data
= (point_attr_mP
) ? point_attr_mP
->data_float4() : NULL
;
635 const size_t num_steps
= pointcloud
->get_motion_steps();
639 if (point_attr_mP
== NULL
) {
640 bvh
->custom_primitive_bound
.alloc(num_points
);
641 for (uint j
= 0; j
< num_points
; j
++) {
642 const PointCloud::Point point
= pointcloud
->get_point(j
);
643 BoundBox bounds
= BoundBox::empty
;
644 point
.bounds_grow(points_data
, radius_data
, bounds
);
645 if (bounds
.valid()) {
646 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
647 bvh
->custom_prim_info
[num_bounds
].x
= j
;
648 bvh
->custom_prim_info
[num_bounds
].y
= PRIMITIVE_POINT
;
653 else if (bvh
->params
.num_motion_point_steps
== 0) {
655 bvh
->custom_primitive_bound
.alloc(num_points
* num_steps
);
657 for (uint j
= 0; j
< num_points
; j
++) {
658 const PointCloud::Point point
= pointcloud
->get_point(j
);
659 BoundBox bounds
= BoundBox::empty
;
660 point
.bounds_grow(points_data
, radius_data
, bounds
);
661 for (size_t step
= 0; step
< num_steps
- 1; step
++) {
662 point
.bounds_grow(motion_data
[step
* num_points
+ j
], bounds
);
664 if (bounds
.valid()) {
665 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
666 bvh
->custom_prim_info
[num_bounds
].x
= j
;
667 bvh
->custom_prim_info
[num_bounds
].y
= PRIMITIVE_POINT
;
674 const int num_bvh_steps
= bvh
->params
.num_motion_point_steps
* 2 + 1;
675 const float num_bvh_steps_inv_1
= 1.0f
/ (num_bvh_steps
- 1);
677 bvh
->custom_primitive_bound
.alloc(num_points
* num_bvh_steps
);
679 for (uint j
= 0; j
< num_points
; j
++) {
680 const PointCloud::Point point
= pointcloud
->get_point(j
);
681 const size_t num_steps
= pointcloud
->get_motion_steps();
682 const float4
*point_steps
= point_attr_mP
->data_float4();
684 float4 prev_key
= point
.motion_key(
685 points_data
, radius_data
, point_steps
, num_points
, num_steps
, 0.0f
, j
);
686 BoundBox prev_bounds
= BoundBox::empty
;
687 point
.bounds_grow(prev_key
, prev_bounds
);
689 for (int bvh_step
= 1; bvh_step
< num_bvh_steps
; ++bvh_step
) {
690 const float curr_time
= (float)(bvh_step
)*num_bvh_steps_inv_1
;
691 float4 curr_key
= point
.motion_key(
692 points_data
, radius_data
, point_steps
, num_points
, num_steps
, curr_time
, j
);
693 BoundBox curr_bounds
= BoundBox::empty
;
694 point
.bounds_grow(curr_key
, curr_bounds
);
695 BoundBox bounds
= prev_bounds
;
696 bounds
.grow(curr_bounds
);
697 if (bounds
.valid()) {
698 const float prev_time
= (float)(bvh_step
- 1) * num_bvh_steps_inv_1
;
699 bvh
->custom_primitive_bound
[num_bounds
] = bounds
;
700 bvh
->custom_prim_info
[num_bounds
].x
= j
;
701 bvh
->custom_prim_info
[num_bounds
].y
= PRIMITIVE_MOTION_POINT
;
702 bvh
->prims_time
[num_bounds
].x
= curr_time
;
703 bvh
->prims_time
[num_bounds
].y
= prev_time
;
706 prev_bounds
= curr_bounds
;
711 bvh
->custom_prim_aabb
.aabbCount
= bvh
->custom_primitive_bound
.size();
712 bvh
->custom_prim_aabb
.aabbStride
= sizeof(BoundBox
);
713 bvh
->custom_primitive_bound
.copy_to_device();
714 bvh
->custom_prim_aabb
.aabbs
= (void *)bvh
->custom_primitive_bound
.device_pointer
;
716 geom_input
.type
= hiprtPrimitiveTypeAABBList
;
717 geom_input
.aabbList
.primitive
= &bvh
->custom_prim_aabb
;
718 geom_input
.geomType
= Point
;
723 void HIPRTDevice::build_blas(BVHHIPRT
*bvh
, Geometry
*geom
, hiprtBuildOptions options
)
725 hiprtGeometryBuildInput geom_input
= {};
727 switch (geom
->geometry_type
) {
729 case Geometry::VOLUME
: {
730 Mesh
*mesh
= static_cast<Mesh
*>(geom
);
732 if (mesh
->num_triangles() == 0)
735 geom_input
= prepare_triangle_blas(bvh
, mesh
);
739 case Geometry::HAIR
: {
740 Hair
*const hair
= static_cast<Hair
*const>(geom
);
742 if (hair
->num_segments() == 0)
745 geom_input
= prepare_curve_blas(bvh
, hair
);
749 case Geometry::POINTCLOUD
: {
750 PointCloud
*pointcloud
= static_cast<PointCloud
*>(geom
);
751 if (pointcloud
->num_points() == 0)
754 geom_input
= prepare_point_blas(bvh
, pointcloud
);
759 assert(geom_input
.geomType
!= hiprtInvalidValue
);
762 size_t blas_scratch_buffer_size
= 0;
763 hiprtError rt_err
= hiprtGetGeometryBuildTemporaryBufferSize(
764 hiprt_context
, &geom_input
, &options
, &blas_scratch_buffer_size
);
766 if (rt_err
!= hiprtSuccess
) {
767 set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
770 rt_err
= hiprtCreateGeometry(hiprt_context
, &geom_input
, &options
, &bvh
->hiprt_geom
);
772 if (rt_err
!= hiprtSuccess
) {
773 set_error(string_printf("Failed to create BLAS!"));
775 bvh
->geom_input
= geom_input
;
777 thread_scoped_lock
lock(hiprt_mutex
);
778 if (blas_scratch_buffer_size
> scratch_buffer_size
) {
779 scratch_buffer
.alloc(blas_scratch_buffer_size
);
780 scratch_buffer_size
= blas_scratch_buffer_size
;
781 scratch_buffer
.zero_to_device();
783 rt_err
= hiprtBuildGeometry(hiprt_context
,
784 hiprtBuildOperationBuild
,
787 (void *)(scratch_buffer
.device_pointer
),
791 if (rt_err
!= hiprtSuccess
) {
792 set_error(string_printf("Failed to build BLAS"));
796 hiprtScene
HIPRTDevice::build_tlas(BVHHIPRT
*bvh
,
797 vector
<Object
*> objects
,
798 hiprtBuildOptions options
,
802 size_t num_object
= objects
.size();
803 if (num_object
== 0) {
807 hiprtBuildOperation build_operation
= refit
? hiprtBuildOperationUpdate
:
808 hiprtBuildOperationBuild
;
810 array
<hiprtFrameMatrix
> transform_matrix
;
812 unordered_map
<Geometry
*, int2
> prim_info_map
;
813 size_t custom_prim_offset
= 0;
815 unordered_map
<Geometry
*, int> prim_time_map
;
817 size_t num_instances
= 0;
818 int blender_instance_id
= 0;
820 user_instance_id
.alloc(num_object
);
821 prim_visibility
.alloc(num_object
);
822 hiprt_blas_ptr
.alloc(num_object
);
823 blas_ptr
.alloc(num_object
);
824 transform_headers
.alloc(num_object
);
825 custom_prim_info_offset
.alloc(num_object
);
826 prim_time_offset
.alloc(num_object
);
828 foreach (Object
*ob
, objects
) {
830 if (ob
->is_traceable()) {
831 mask
= ob
->visibility_for_tracing();
834 Transform current_transform
= ob
->get_tfm();
835 Geometry
*geom
= ob
->get_geometry();
836 bool transform_applied
= geom
->transform_applied
;
838 BVHHIPRT
*current_bvh
= static_cast<BVHHIPRT
*>(geom
->bvh
);
839 bool is_valid_geometry
= current_bvh
->geom_input
.geomType
!= hiprtInvalidValue
;
840 hiprtGeometry hiprt_geom_current
= current_bvh
->hiprt_geom
;
842 hiprtFrameMatrix hiprt_transform_matrix
= {{{0}}};
843 Transform identity_matrix
= transform_identity();
844 get_hiprt_transform(hiprt_transform_matrix
.matrix
, identity_matrix
);
846 if (is_valid_geometry
) {
847 bool is_custom_prim
= current_bvh
->custom_prim_info
.size() > 0;
849 if (is_custom_prim
) {
851 bool has_motion_blur
= current_bvh
->prims_time
.size() > 0;
853 unordered_map
<Geometry
*, int2
>::iterator it
= prim_info_map
.find(geom
);
855 if (prim_info_map
.find(geom
) != prim_info_map
.end()) {
857 custom_prim_info_offset
[blender_instance_id
] = it
->second
;
859 if (has_motion_blur
) {
861 prim_time_offset
[blender_instance_id
] = prim_time_map
[geom
];
865 int offset
= bvh
->custom_prim_info
.size();
867 prim_info_map
[geom
].x
= offset
;
868 prim_info_map
[geom
].y
= custom_prim_offset
;
870 bvh
->custom_prim_info
.resize(offset
+ current_bvh
->custom_prim_info
.size());
871 memcpy(bvh
->custom_prim_info
.data() + offset
,
872 current_bvh
->custom_prim_info
.data(),
873 current_bvh
->custom_prim_info
.size() * sizeof(int2
));
875 custom_prim_info_offset
[blender_instance_id
].x
= offset
;
876 custom_prim_info_offset
[blender_instance_id
].y
= custom_prim_offset
;
878 if (geom
->geometry_type
== Geometry::HAIR
) {
879 custom_prim_offset
+= ((Hair
*)geom
)->num_curves();
881 else if (geom
->geometry_type
== Geometry::POINTCLOUD
) {
882 custom_prim_offset
+= ((PointCloud
*)geom
)->num_points();
885 custom_prim_offset
+= ((Mesh
*)geom
)->num_triangles();
888 if (has_motion_blur
) {
889 int time_offset
= bvh
->prims_time
.size();
890 prim_time_map
[geom
] = time_offset
;
892 memcpy(bvh
->prims_time
.data() + time_offset
,
893 current_bvh
->prims_time
.data(),
894 current_bvh
->prims_time
.size() * sizeof(float2
));
896 prim_time_offset
[blender_instance_id
] = time_offset
;
899 prim_time_offset
[blender_instance_id
] = -1;
903 custom_prim_info_offset
[blender_instance_id
] = {-1, -1};
905 hiprtTransformHeader current_header
= {0};
906 current_header
.frameCount
= 1;
907 current_header
.frameIndex
= transform_matrix
.size();
908 if (ob
->get_motion().size()) {
909 int motion_size
= ob
->get_motion().size();
910 assert(motion_size
== 1);
912 array
<Transform
> tfm_array
= ob
->get_motion();
913 float time_iternval
= 1 / (float)(motion_size
- 1);
914 current_header
.frameCount
= motion_size
;
916 vector
<hiprtFrameMatrix
> tfm_hiprt_mb
;
917 tfm_hiprt_mb
.resize(motion_size
);
918 for (int i
= 0; i
< motion_size
; i
++) {
919 get_hiprt_transform(tfm_hiprt_mb
[i
].matrix
, tfm_array
[i
]);
920 tfm_hiprt_mb
[i
].time
= (float)i
* time_iternval
;
921 transform_matrix
.push_back_slow(tfm_hiprt_mb
[i
]);
925 if (transform_applied
)
926 current_transform
= identity_matrix
;
927 get_hiprt_transform(hiprt_transform_matrix
.matrix
, current_transform
);
928 transform_matrix
.push_back_slow(hiprt_transform_matrix
);
931 transform_headers
[num_instances
] = current_header
;
933 user_instance_id
[num_instances
] = blender_instance_id
;
934 prim_visibility
[num_instances
] = mask
;
935 hiprt_blas_ptr
[num_instances
] = (uint64_t)hiprt_geom_current
;
938 blas_ptr
[blender_instance_id
] = (uint64_t)hiprt_geom_current
;
939 blender_instance_id
++;
942 int frame_count
= transform_matrix
.size();
943 hiprtSceneBuildInput scene_input_ptr
= {0};
944 scene_input_ptr
.instanceCount
= num_instances
;
945 scene_input_ptr
.frameCount
= frame_count
;
946 scene_input_ptr
.frameType
= hiprtFrameTypeMatrix
;
948 user_instance_id
.copy_to_device();
949 prim_visibility
.copy_to_device();
950 hiprt_blas_ptr
.copy_to_device();
951 blas_ptr
.copy_to_device();
952 transform_headers
.copy_to_device();
954 instance_transform_matrix
.alloc(frame_count
);
955 instance_transform_matrix
.host_pointer
= transform_matrix
.data();
956 instance_transform_matrix
.data_elements
= sizeof(hiprtFrameMatrix
);
957 instance_transform_matrix
.data_type
= TYPE_UCHAR
;
958 instance_transform_matrix
.data_size
= frame_count
;
959 instance_transform_matrix
.copy_to_device();
960 instance_transform_matrix
.host_pointer
= 0;
963 scene_input_ptr
.instanceMasks
= (void *)prim_visibility
.device_pointer
;
964 scene_input_ptr
.instanceGeometries
= (void *)hiprt_blas_ptr
.device_pointer
;
965 scene_input_ptr
.instanceTransformHeaders
= (void *)transform_headers
.device_pointer
;
966 scene_input_ptr
.instanceFrames
= (void *)instance_transform_matrix
.device_pointer
;
968 hiprtScene scene
= 0;
970 hiprtError rt_err
= hiprtCreateScene(hiprt_context
, &scene_input_ptr
, &options
, &scene
);
972 if (rt_err
!= hiprtSuccess
) {
973 set_error(string_printf("Failed to create TLAS"));
976 size_t tlas_scratch_buffer_size
;
977 rt_err
= hiprtGetSceneBuildTemporaryBufferSize(
978 hiprt_context
, &scene_input_ptr
, &options
, &tlas_scratch_buffer_size
);
980 if (rt_err
!= hiprtSuccess
) {
981 set_error(string_printf("Failed to get scratch buffer size for TLAS"));
984 if (tlas_scratch_buffer_size
> scratch_buffer_size
) {
985 scratch_buffer
.alloc(tlas_scratch_buffer_size
);
986 scratch_buffer
.zero_to_device();
989 rt_err
= hiprtBuildScene(hiprt_context
,
993 (void *)scratch_buffer
.device_pointer
,
997 if (rt_err
!= hiprtSuccess
) {
998 set_error(string_printf("Failed to build TLAS"));
1001 scratch_buffer
.free();
1002 scratch_buffer_size
= 0;
1004 if (bvh
->custom_prim_info
.size()) {
1005 size_t data_size
= bvh
->custom_prim_info
.size();
1006 custom_prim_info
.alloc(data_size
);
1007 custom_prim_info
.host_pointer
= bvh
->custom_prim_info
.data();
1008 custom_prim_info
.data_elements
= 2;
1009 custom_prim_info
.data_type
= TYPE_INT
;
1010 custom_prim_info
.data_size
= data_size
;
1011 custom_prim_info
.copy_to_device();
1012 custom_prim_info
.host_pointer
= 0;
1014 custom_prim_info_offset
.copy_to_device();
1017 if (bvh
->prims_time
.size()) {
1018 size_t data_size
= bvh
->prims_time
.size();
1019 prims_time
.alloc(data_size
);
1020 prims_time
.host_pointer
= bvh
->prims_time
.data();
1021 prims_time
.data_elements
= 2;
1022 prims_time
.data_type
= TYPE_FLOAT
;
1023 prims_time
.data_size
= data_size
;
1024 prims_time
.copy_to_device();
1025 prims_time
.host_pointer
= 0;
1027 prim_time_offset
.copy_to_device();
1030 size_t table_ptr_size
= 0;
1031 hipDeviceptr_t table_device_ptr
;
1033 hip_assert(hipModuleGetGlobal(&table_device_ptr
, &table_ptr_size
, hipModule
, "kernel_params"));
1035 size_t kernel_param_offset
[4];
1036 int table_index
= 0;
1037 kernel_param_offset
[table_index
++] = offsetof(KernelParamsHIPRT
, table_closest_intersect
);
1038 kernel_param_offset
[table_index
++] = offsetof(KernelParamsHIPRT
, table_shadow_intersect
);
1039 kernel_param_offset
[table_index
++] = offsetof(KernelParamsHIPRT
, table_local_intersect
);
1040 kernel_param_offset
[table_index
++] = offsetof(KernelParamsHIPRT
, table_volume_intersect
);
1042 for (int index
= 0; index
< table_index
; index
++) {
1044 hip_assert(hipMemcpyHtoD(
1045 table_device_ptr
+ kernel_param_offset
[index
], &functions_table
, sizeof(device_ptr
)));
1051 void HIPRTDevice::build_bvh(BVH
*bvh
, Progress
&progress
, bool refit
)
1053 progress
.set_substatus("Building HIPRT acceleration structure");
1055 hiprtBuildOptions options
;
1056 options
.buildFlags
= hiprtBuildFlagBitPreferHighQualityBuild
;
1058 BVHHIPRT
*bvh_rt
= static_cast<BVHHIPRT
*>(bvh
);
1059 HIPContextScope
scope(this);
1061 if (!bvh_rt
->is_tlas()) {
1062 vector
<Geometry
*> geometry
= bvh_rt
->geometry
;
1063 assert(geometry
.size() == 1);
1064 Geometry
*geom
= geometry
[0];
1065 build_blas(bvh_rt
, geom
, options
);
1069 const vector
<Object
*> objects
= bvh_rt
->objects
;
1070 scene
= build_tlas(bvh_rt
, objects
, options
, refit
);