Initial commit.
This commit is contained in:
commit
d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions
168
Framework/external/embree/kernels/sycl/rtcore_sycl.cpp
vendored
Normal file
168
Framework/external/embree/kernels/sycl/rtcore_sycl.cpp
vendored
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTC_EXPORT_API
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "../common/device.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../common/context.h"
|
||||
#include "../geometry/filter.h"
|
||||
#include "rthwif_embree.h"
|
||||
using namespace embree;
|
||||
|
||||
#define DBG(x)
|
||||
|
||||
RTC_NAMESPACE_BEGIN;
|
||||
|
||||
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDeviceInternal(sycl::context context, const char* config);
|
||||
|
||||
void use_rthwif_embree();
|
||||
void use_rthwif_production();
|
||||
|
||||
/* we define rtcNewSYCLDevice in libembree_sycl.a to avoid drop of rtcore_sycl.o during linking of libembree_sycl.a file */
|
||||
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config)
|
||||
{
|
||||
use_rthwif_embree(); // to avoid drop of rthwif_embree.o during linking of libembree_sycl.a file
|
||||
#if defined(EMBREE_SYCL_RT_VALIDATION_API)
|
||||
use_rthwif_production(); // to avoid drop of rthwif_production.o during linking of libembree_sycl.a file
|
||||
#endif
|
||||
return rtcNewSYCLDeviceInternal(context, config);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && (defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION))
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersect1(RTCScene hscene, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args)
|
||||
{
|
||||
RTCIntersectArguments default_args;
|
||||
if (args == nullptr) {
|
||||
rtcInitIntersectArguments(&default_args);
|
||||
args = &default_args;
|
||||
}
|
||||
RTCRayQueryContext* context = args->context;
|
||||
|
||||
RTCRayQueryContext defaultContext;
|
||||
if (unlikely(context == nullptr)) {
|
||||
rtcInitRayQueryContext(&defaultContext);
|
||||
context = &defaultContext;
|
||||
}
|
||||
|
||||
rtcIntersectRTHW(hscene, context, rayhit, args);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardIntersect1(const RTCIntersectFunctionNArguments* args_, RTCScene scene, struct RTCRay* iray, unsigned int instID)
|
||||
{
|
||||
return rtcForwardIntersect1Ex(args_, scene, iray, instID, 0);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardIntersect1Ex(const RTCIntersectFunctionNArguments* args_, RTCScene scene, struct RTCRay* iray, unsigned int instID, unsigned int instPrimID)
|
||||
{
|
||||
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_;
|
||||
assert(args->N == 1);
|
||||
assert(args->forward_scene == nullptr);
|
||||
|
||||
Ray* oray = (Ray*)args->rayhit;
|
||||
oray->org.x = iray->org_x;
|
||||
oray->org.y = iray->org_y;
|
||||
oray->org.z = iray->org_z;
|
||||
oray->dir.x = iray->dir_x;
|
||||
oray->dir.y = iray->dir_y;
|
||||
oray->dir.z = iray->dir_z;
|
||||
args->forward_scene = scene;
|
||||
instance_id_stack::push(args->context, instID, instPrimID);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccluded1(RTCScene hscene, struct RTCRay* ray, struct RTCOccludedArguments* args)
|
||||
{
|
||||
RTCOccludedArguments default_args;
|
||||
if (args == nullptr) {
|
||||
rtcInitOccludedArguments(&default_args);
|
||||
args = &default_args;
|
||||
}
|
||||
RTCRayQueryContext* context = args->context;
|
||||
|
||||
RTCRayQueryContext defaultContext;
|
||||
if (unlikely(context == nullptr)) {
|
||||
rtcInitRayQueryContext(&defaultContext);
|
||||
context = &defaultContext;
|
||||
}
|
||||
|
||||
rtcOccludedRTHW(hscene, context, ray, args);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardOccluded1(const RTCOccludedFunctionNArguments *args_, RTCScene scene, struct RTCRay *iray, unsigned int instID){
|
||||
return rtcForwardOccluded1Ex(args_, scene, iray, instID, 0);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardOccluded1Ex(const RTCOccludedFunctionNArguments *args_, RTCScene scene, struct RTCRay *iray, unsigned int instID, unsigned int instPrimID)
|
||||
{
|
||||
OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_;
|
||||
assert(args->N == 1);
|
||||
assert(args->forward_scene == nullptr);
|
||||
|
||||
Ray* oray = (Ray*)args->ray;
|
||||
oray->org.x = iray->org_x;
|
||||
oray->org.y = iray->org_y;
|
||||
oray->org.z = iray->org_z;
|
||||
oray->dir.x = iray->dir_x;
|
||||
oray->dir.y = iray->dir_y;
|
||||
oray->dir.z = iray->dir_z;
|
||||
args->forward_scene = scene;
|
||||
instance_id_stack::push(args->context, instID, instPrimID);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void* rtcGetGeometryUserDataFromScene (RTCScene hscene, unsigned int geomID)
|
||||
{
|
||||
Scene* scene = (Scene*) hscene;
|
||||
//RTC_CATCH_BEGIN;
|
||||
//RTC_TRACE(rtcGetGeometryUserDataFromScene);
|
||||
#if defined(DEBUG)
|
||||
//RTC_VERIFY_HANDLE(hscene);
|
||||
//RTC_VERIFY_GEOMID(geomID);
|
||||
#endif
|
||||
//RTC_ENTER_DEVICE(hscene); // do not enable for performance reasons
|
||||
return scene->get(geomID)->getUserData();
|
||||
//RTC_CATCH_END2(scene);
|
||||
//return nullptr;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcGetGeometryTransformFromScene(RTCScene hscene, unsigned int geomID, float time, enum RTCFormat format, void* xfm)
|
||||
{
|
||||
Scene* scene = (Scene*) hscene;
|
||||
//RTC_CATCH_BEGIN;
|
||||
//RTC_TRACE(rtcGetGeometryTransformFromScene);
|
||||
//RTC_ENTER_DEVICE(hscene);
|
||||
AffineSpace3fa transform = one;
|
||||
Geometry* geom = scene->get(geomID);
|
||||
if (geom->getTypeMask() & Geometry::MTY_INSTANCE) {
|
||||
Instance* instance = (Instance*) geom;
|
||||
if (likely(instance->numTimeSteps <= 1))
|
||||
transform = instance->getLocal2World();
|
||||
else
|
||||
transform = instance->getLocal2World(time);
|
||||
}
|
||||
storeTransform(transform, format, (float*)xfm);
|
||||
//RTC_CATCH_END2(geometry);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcInvokeIntersectFilterFromGeometry(const RTCIntersectFunctionNArguments* args_i, const RTCFilterFunctionNArguments* filter_args)
|
||||
{
|
||||
#if EMBREE_SYCL_GEOMETRY_CALLBACK
|
||||
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
|
||||
if (args->geometry->intersectionFilterN)
|
||||
args->geometry->intersectionFilterN(filter_args);
|
||||
#endif
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcInvokeOccludedFilterFromGeometry(const RTCOccludedFunctionNArguments* args_i, const RTCFilterFunctionNArguments* filter_args)
|
||||
{
|
||||
#if EMBREE_SYCL_GEOMETRY_CALLBACK
|
||||
OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i;
|
||||
if (args->geometry->occlusionFilterN)
|
||||
args->geometry->occlusionFilterN(filter_args);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
RTC_NAMESPACE_END;
|
||||
866
Framework/external/embree/kernels/sycl/rthwif_embree.cpp
vendored
Normal file
866
Framework/external/embree/kernels/sycl/rthwif_embree.cpp
vendored
Normal file
|
|
@ -0,0 +1,866 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "../common/device.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../common/context.h"
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
|
||||
#include "../geometry/disc_intersector.h"
|
||||
#include "../geometry/sphere_intersector.h"
|
||||
#include "../geometry/roundline_intersector.h"
|
||||
#include "../geometry/curve_intersector_ribbon.h"
|
||||
#include "../geometry/curve_intersector_oriented.h"
|
||||
#include "../geometry/curve_intersector_sweep.h"
|
||||
#include "../geometry/curve_intersector_distance.h"
|
||||
#include "../geometry/curve_intersector_ribbon.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/instance_array_intersector.h"
|
||||
#include "../geometry/intersector_epilog_sycl.h"
|
||||
#include "../geometry/triangle_intersector_moeller.h"
|
||||
#include "../geometry/triangle_intersector_pluecker.h"
|
||||
|
||||
#include "rthwif_embree.h"
|
||||
#include "../rthwif/rttrace/rttrace.h"
|
||||
|
||||
using namespace embree;
|
||||
|
||||
intel_ray_flags_t operator |(intel_ray_flags_t a, intel_ray_flags_t b) {
|
||||
return (intel_ray_flags_t) ((uint32_t)a | (uint32_t)b);
|
||||
}
|
||||
|
||||
intel_ray_flags_t operator |= (intel_ray_flags_t& a, intel_ray_flags_t b) {
|
||||
return a = a | b;
|
||||
}
|
||||
|
||||
RTC_NAMESPACE_BEGIN;
|
||||
|
||||
//#if defined(EMBREE_SYCL_ROBUST)
|
||||
#define TriangleIntersector isa::PlueckerIntersector1<1,true>
|
||||
#define ROBUST_MODE true
|
||||
//#else
|
||||
//#define TriangleIntersector isa::MoellerTrumboreIntersector1<1,true>
|
||||
//#define ROBUST_MODE false
|
||||
//#endif
|
||||
|
||||
const constexpr uint32_t TRAV_LOOP_FEATURES =
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
|
||||
RTC_FEATURE_FLAG_TRIANGLE | // filter function enforced for triangles and quads in this case
|
||||
RTC_FEATURE_FLAG_QUAD |
|
||||
#endif
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
RTC_FEATURE_FLAG_32_BIT_RAY_MASK |
|
||||
#endif
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR |
|
||||
RTC_FEATURE_FLAG_CURVES |
|
||||
RTC_FEATURE_FLAG_GRID |
|
||||
RTC_FEATURE_FLAG_POINT |
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY |
|
||||
RTC_FEATURE_FLAG_INSTANCE |
|
||||
RTC_FEATURE_FLAG_INSTANCE_ARRAY |
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION;
|
||||
|
||||
void use_rthwif_embree() {
|
||||
}
|
||||
|
||||
__forceinline Vec3f intel_get_hit_triangle_normal(intel_ray_query_t& query, intel_hit_type_t hit_type)
|
||||
{
|
||||
intel_float3 v[3]; intel_get_hit_triangle_vertices(query, v, hit_type);
|
||||
const Vec3f v0(v[0].x, v[0].y, v[0].z);
|
||||
const Vec3f v1(v[1].x, v[1].y, v[1].z);
|
||||
const Vec3f v2(v[2].x, v[2].y, v[2].z);
|
||||
return cross(v1-v0, v2-v0);
|
||||
}
|
||||
|
||||
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & geom->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
RTCScene forward_scene = nullptr;
|
||||
const bool ishit = geom->intersect(ray,geomID,primID,context,forward_scene);
|
||||
if (!forward_scene) return ishit;
|
||||
|
||||
/* forward ray to instanced scene */
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
Scene* scene = (Scene*) forward_scene;
|
||||
scenes[bvh_level] = scene;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
|
||||
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_force_non_opaque;
|
||||
|
||||
//if (context.enforceArgumentFilterFunction())
|
||||
// raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
|
||||
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[0];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & geom->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
RTCScene forward_scene = nullptr;
|
||||
const bool ishit = geom->occluded(ray,geomID,primID,context,forward_scene);
|
||||
if (!forward_scene) return ishit;
|
||||
|
||||
/* forward ray to instanced scene */
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
Scene* scene = (Scene*) forward_scene;
|
||||
scenes[bvh_level] = scene;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
|
||||
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_force_non_opaque | intel_ray_flags_accept_first_hit_and_end_search;
|
||||
|
||||
//if (context.enforceArgumentFilterFunction())
|
||||
// raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
|
||||
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[0];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename Ray>
|
||||
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID);
|
||||
|
||||
template<>
|
||||
__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & instance->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (!instance_id_stack::push(context->user, geomID, 0))
|
||||
return false;
|
||||
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
|
||||
Scene* object = (Scene*) instance->object;
|
||||
const AffineSpace3fa world2local = instance->getWorld2Local(ray.time());
|
||||
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
|
||||
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
|
||||
scenes[bvh_level] = object;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
|
||||
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_force_non_opaque;
|
||||
|
||||
//if (context.enforceArgumentFilterFunction())
|
||||
// raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & instance->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (!instance_id_stack::push(context->user, geomID, 0))
|
||||
return false;
|
||||
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
|
||||
Scene* object = (Scene*) instance->object;
|
||||
const AffineSpace3fa world2local = instance->getWorld2Local(ray.time());
|
||||
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
|
||||
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
|
||||
scenes[bvh_level] = object;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
|
||||
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
|
||||
|
||||
if (context->enforceArgumentFilterFunction())
|
||||
raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename Ray>
|
||||
__forceinline bool intersect_instance_array(intel_ray_query_t& query, Ray& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID);
|
||||
|
||||
template<>
|
||||
__forceinline bool intersect_instance_array(intel_ray_query_t& query, RayHit& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
Scene* object = (Scene*) instance->getObject(primID);
|
||||
if (!object) return false;
|
||||
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & instance->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (!instance_id_stack::push(context->user, geomID, primID))
|
||||
return false;
|
||||
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
|
||||
const AffineSpace3fa world2local = instance->getWorld2Local(primID, ray.time());
|
||||
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
|
||||
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
|
||||
scenes[bvh_level] = object;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
|
||||
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_force_non_opaque;
|
||||
|
||||
//if (context.enforceArgumentFilterFunction())
|
||||
// raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
__forceinline bool intersect_instance_array(intel_ray_query_t& query, Ray& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
|
||||
{
|
||||
Scene* object = (Scene*) instance->getObject(primID);
|
||||
if (!object) return false;
|
||||
|
||||
/* perform ray mask test */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if ((ray.mask & instance->mask) == 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (!instance_id_stack::push(context->user, geomID, primID))
|
||||
return false;
|
||||
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
|
||||
|
||||
const AffineSpace3fa world2local = instance->getWorld2Local(primID, ray.time());
|
||||
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
|
||||
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
|
||||
scenes[bvh_level] = object;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
|
||||
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = inf; // unused
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
|
||||
|
||||
if (context->enforceArgumentFilterFunction())
|
||||
raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename Ray>
|
||||
__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], Geometry* geom, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask)
|
||||
{
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
bool filter = feature_mask & (RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY);
|
||||
if (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
if (ray.time() < geom->time_range.lower || geom->time_range.upper < ray.time())
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
if ((feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY) && (geom->getType() == Geometry::GTY_USER_GEOMETRY)) {
|
||||
return intersect_user_geometry(query,ray,(UserGeometry*)geom, scenes, context, geomID, primID);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
if ((feature_mask & RTC_FEATURE_FLAG_INSTANCE) && (geom->getTypeMask() & Geometry::MTY_INSTANCE)) {
|
||||
return intersect_instance(query,ray,(Instance*)geom, scenes, context, geomID, primID);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
if ((feature_mask & RTC_FEATURE_FLAG_INSTANCE_ARRAY) && (geom->getTypeMask() & Geometry::MTY_INSTANCE_ARRAY)) {
|
||||
return intersect_instance_array(query,ray,(InstanceArray*)geom, scenes, context, geomID, primID);
|
||||
}
|
||||
#endif
|
||||
|
||||
isa::CurvePrecalculations1 pre(ray,context->scene);
|
||||
|
||||
const Geometry::GType gtype MAYBE_UNUSED = geom->getType();
|
||||
const Geometry::GType stype MAYBE_UNUSED = (Geometry::GType)(gtype & Geometry::GTY_SUBTYPE_MASK);
|
||||
const Geometry::GType basis MAYBE_UNUSED = (Geometry::GType)(gtype & Geometry::GTY_BASIS_MASK);
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
if (gtype == Geometry::GTY_TRIANGLE_MESH && (feature_mask & RTC_FEATURE_FLAG_TRIANGLE) && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
|
||||
{
|
||||
const TriangleMesh* geom = context->scene->get<TriangleMesh>(geomID);
|
||||
const TriangleMesh::Triangle triangle = geom->triangle(primID);
|
||||
Vec3fa v0 = geom->vertex(triangle.v[0], ray.time());
|
||||
Vec3fa v1 = geom->vertex(triangle.v[1], ray.time());
|
||||
Vec3fa v2 = geom->vertex(triangle.v[2], ray.time());
|
||||
return TriangleIntersector().intersect(ray,v0,v1,v2,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
|
||||
} else
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
if (gtype == Geometry::GTY_QUAD_MESH && (feature_mask & RTC_FEATURE_FLAG_QUAD) && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
|
||||
{
|
||||
const QuadMesh* geom = context->scene->get<QuadMesh>(geomID);
|
||||
const QuadMesh::Quad quad = geom->quad(primID);
|
||||
Vec3fa v0 = geom->vertex(quad.v[0], ray.time());
|
||||
Vec3fa v1 = geom->vertex(quad.v[1], ray.time());
|
||||
Vec3fa v2 = geom->vertex(quad.v[2], ray.time());
|
||||
Vec3fa v3 = geom->vertex(quad.v[3], ray.time());
|
||||
bool ishit0 = TriangleIntersector().intersect(ray,v0,v1,v3,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
|
||||
bool ishit1 = TriangleIntersector().intersect(ray,v2,v3,v1,[&](float &u, float &v, Vec3f& Ng) { u = 1.f - u; v = 1.f - v; }, Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
|
||||
return ishit0 || ishit1;
|
||||
} else
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
if (gtype == Geometry::GTY_GRID_MESH && (feature_mask & RTC_FEATURE_FLAG_GRID))
|
||||
{
|
||||
const GridMesh* mesh = context->scene->get<GridMesh>(geomID);
|
||||
const GridMesh::PrimID_XY c = mesh->quadID_to_primID_xy[primID];
|
||||
const GridMesh::Grid& g = mesh->grid(c.primID);
|
||||
|
||||
auto map_uv0 = [&](float &u, float &v, Vec3f& Ng) {
|
||||
const Vec2f uv(u,v);
|
||||
u = (c.x + uv.x)/(g.resX-1);
|
||||
v = (c.y + uv.y)/(g.resY-1);
|
||||
};
|
||||
|
||||
auto map_uv1 = [&](float &u, float &v, Vec3f& Ng) {
|
||||
const Vec2f uv(1.0f-u, 1.0f-v);
|
||||
u = (c.x + uv.x)/(g.resX-1);
|
||||
v = (c.y + uv.y)/(g.resY-1);
|
||||
};
|
||||
|
||||
Vec3fa v0,v1,v2,v3;
|
||||
mesh->gather_quad_vertices_safe(v0,v1,v2,v3,g,c.x,c.y,ray.time());
|
||||
|
||||
bool ishit0 = TriangleIntersector().intersect(ray,v0,v1,v3,map_uv0,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, c.primID, filter));
|
||||
bool ishit1 = TriangleIntersector().intersect(ray,v2,v3,v1,map_uv1,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, c.primID, filter));
|
||||
return ishit0 || ishit1;
|
||||
} else
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_POINT)
|
||||
|
||||
if (gtype == Geometry::GTY_SPHERE_POINT && (feature_mask & RTC_FEATURE_FLAG_SPHERE_POINT))
|
||||
{
|
||||
const Points* geom = context->scene->get<Points>(geomID);
|
||||
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
|
||||
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
|
||||
return isa::SphereIntersector1<1>::intersect(true, ray, pre, vr, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else if (gtype == Geometry::GTY_DISC_POINT && (feature_mask & RTC_FEATURE_FLAG_DISC_POINT))
|
||||
{
|
||||
const Points* geom = context->scene->get<Points>(geomID);
|
||||
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
|
||||
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
|
||||
return isa::DiscIntersector1<1>::intersect(true, ray, nullptr, nullptr, pre, vr, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else if (gtype == Geometry::GTY_ORIENTED_DISC_POINT && (feature_mask & RTC_FEATURE_FLAG_ORIENTED_DISC_POINT))
|
||||
{
|
||||
const Points* geom = context->scene->get<Points>(geomID);
|
||||
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
|
||||
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
|
||||
const Vec3f n = geom->normal_safe(primID, ray.time());
|
||||
return isa::DiscIntersector1<1>::intersect(true, ray, nullptr, nullptr, pre, vr, n, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
} else
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE)
|
||||
|
||||
if (geom->getTypeMask() & Geometry::MTY_CURVES)
|
||||
{
|
||||
if (gtype == Geometry::GTY_FLAT_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE))
|
||||
{
|
||||
LineSegments* geom = context->scene->get<LineSegments>(geomID);
|
||||
Vec3ff v0, v1; geom->gather_safe(v0,v1,geom->segment(primID),ray.time());
|
||||
return isa::FlatLinearCurveIntersector1<1>::intersect(true,ray,context,geom,pre,v0,v1,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else if (gtype == Geometry::GTY_ROUND_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE))
|
||||
{
|
||||
LineSegments* geom = context->scene->get<LineSegments>(geomID);
|
||||
Vec3ff v0,v1,v2,v3; geom->gather_safe(v0,v1,v2,v3,primID,geom->segment(primID),ray.time());
|
||||
return isa::RoundLinearCurveIntersector1<1>().intersect(true,ray,context,geom,pre,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else if (gtype == Geometry::GTY_CONE_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_CONE_LINEAR_CURVE))
|
||||
{
|
||||
LineSegments* geom = context->scene->get<LineSegments>(geomID);
|
||||
Vec3ff v0 = zero, v1 = zero; bool cL = false, cR = false;
|
||||
geom->gather_safe(v0,v1,cL,cR,primID,geom->segment(primID),ray.time());
|
||||
return isa::ConeCurveIntersector1<1>().intersect(true,ray,context,geom,pre,v0,v1,cL,cR,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else
|
||||
{
|
||||
CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
|
||||
if (stype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_CURVES))
|
||||
{
|
||||
using Intersector = isa::OrientedCurve1Intersector1<CubicBezierCurve,8,1>;
|
||||
using Curve = isa::TensorLinearCubicBezierSurface3fa;
|
||||
if (geom->numTimeSegments() > 0 && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
|
||||
{
|
||||
Curve curve;
|
||||
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE)) {
|
||||
curve = geom->getNormalOrientedHermiteCurveSafe<HermiteCurveT<Vec3ff>, HermiteCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
|
||||
}
|
||||
else if (basis == Geometry::GTY_BASIS_BSPLINE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE)) {
|
||||
curve = geom->getNormalOrientedCurveSafe<BSplineCurveT<Vec3ff>, BSplineCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
|
||||
}
|
||||
else if (basis == Geometry::GTY_BASIS_CATMULL_ROM && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE)) {
|
||||
curve = geom->getNormalOrientedCurveSafe<CatmullRomCurveT<Vec3ff>, CatmullRomCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
|
||||
}
|
||||
else {
|
||||
curve = geom->getNormalOrientedCurveSafe<Intersector::SourceCurve3ff, Intersector::SourceCurve3fa, Curve>(context,ray.org,primID,ray.time());
|
||||
}
|
||||
return Intersector().intersect(pre,ray,context,geom,primID,curve,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec3ff v0,v1,v2,v3;
|
||||
Vec3fa n0,n1,n2,n3;
|
||||
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE))
|
||||
geom->gather_hermite_safe(v0,v1,n0,n1,v2,v3,n2,n3,geom->curve(primID),ray.time());
|
||||
else
|
||||
geom->gather_safe(v0,v1,v2,v3,n0,n1,n2,n3,geom->curve(primID),ray.time());
|
||||
isa::convert_to_bezier(gtype, v0,v1,v2,v3, n0,n1,n2,n3);
|
||||
return Intersector().intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,n0,n1,n2,n3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
}
|
||||
else if (feature_mask & (RTC_FEATURE_FLAG_FLAT_CURVES | RTC_FEATURE_FLAG_ROUND_CURVES)) {
|
||||
Vec3ff v0,v1,v2,v3;
|
||||
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & (RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE | RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE)))
|
||||
geom->gather_hermite_safe(v0,v1,v2,v3,geom->curve(primID),ray.time());
|
||||
else
|
||||
geom->gather_safe(v0,v1,v2,v3,geom->curve(primID),ray.time());
|
||||
|
||||
isa::convert_to_bezier(gtype, v0,v1,v2,v3);
|
||||
|
||||
if (stype == Geometry::GTY_SUBTYPE_FLAT_CURVE && (feature_mask & RTC_FEATURE_FLAG_FLAT_CURVES))
|
||||
{
|
||||
isa::RibbonCurve1Intersector1<CubicBezierCurve,1> intersector;
|
||||
return intersector.intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
else if (stype == Geometry::GTY_SUBTYPE_ROUND_CURVE && (feature_mask & RTC_FEATURE_FLAG_ROUND_CURVES))
|
||||
{
|
||||
isa::SweepCurve1Intersector1<CubicBezierCurve> intersector;
|
||||
return intersector.intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} else
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline bool invokeTriangleIntersectionFilter(intel_ray_query_t& query, Geometry* geom, uint32_t bvh_level, RayHit& ray, Hit& hit, sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
|
||||
{
|
||||
#if defined(EMBREE_FILTER_FUNCTION)
|
||||
if (!(feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION) || runIntersectionFilter1SYCL(geom, ray, context, hit))
|
||||
#endif
|
||||
{
|
||||
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(hit.u, hit.v));
|
||||
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
ray.instID[l] = hit.instID[l];
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
ray.instPrimID[l] = hit.instPrimID[l];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline bool invokeTriangleIntersectionFilter(intel_ray_query_t& query, Geometry* geom, uint32_t bvh_level, Ray& ray, Hit& hit, sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
|
||||
{
|
||||
bool ishit = true;
|
||||
#if defined(EMBREE_FILTER_FUNCTION)
|
||||
ishit = !(feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION) || runIntersectionFilter1SYCL(geom, ray, context, hit);
|
||||
if (ishit)
|
||||
#endif
|
||||
{
|
||||
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(hit.u, hit.v));
|
||||
}
|
||||
return ishit;
|
||||
}
|
||||
|
||||
__forceinline bool commit_potential_hit(intel_ray_query_t& query, RayHit& ray) {
|
||||
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(ray.u, ray.v));
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline bool commit_potential_hit(intel_ray_query_t& query, Ray& ray) {
|
||||
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(0.0f, 0.0f));
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename Ray>
|
||||
__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
|
||||
{
|
||||
while (!intel_is_traversal_done(query))
|
||||
{
|
||||
intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit);
|
||||
|
||||
const unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
|
||||
const float3 org = intel_get_ray_origin ( query, bvh_level );
|
||||
const float3 dir = intel_get_ray_direction( query, bvh_level );
|
||||
const float t = intel_get_hit_distance(query, intel_hit_type_potential_hit);
|
||||
const float2 uv = intel_get_hit_barycentrics (query, intel_hit_type_potential_hit);
|
||||
const unsigned int geomID = intel_get_hit_geometry_id(query, intel_hit_type_potential_hit);
|
||||
const unsigned int primID = intel_get_hit_primitive_id(query, intel_hit_type_potential_hit);
|
||||
|
||||
ray.org = Vec3ff(org.x(), org.y(), org.z(), ray.tnear());
|
||||
ray.dir = Vec3ff(dir.x(), dir.y(), dir.z(), ray.time ());
|
||||
ray.tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit);
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
context->user->instStackSize = bvh_level;
|
||||
Scene* scene = scenes[bvh_level];
|
||||
#else
|
||||
const unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_potential_hit);
|
||||
|
||||
/* assume software instancing mode by default (required for rtcForwardRay) */
|
||||
Scene* scene = scenes[bvh_level];
|
||||
|
||||
/* if we are in hardware instancing mode and we need to read the scene from the instance */
|
||||
if (bvh_level > 0 && instID != RTC_INVALID_GEOMETRY_ID) {
|
||||
Instance* inst = scenes[0]->get<Instance>(instID);
|
||||
scene = (Scene*) inst->object;
|
||||
context->user->instID[0] = instID;
|
||||
}
|
||||
else if (bvh_level == 0)
|
||||
context->user->instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
|
||||
#endif
|
||||
context->scene = scene;
|
||||
Geometry* geom = scene->get(geomID);
|
||||
|
||||
/* perform ray masking */
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
if (ray.mask & geom->mask)
|
||||
#endif
|
||||
{
|
||||
if (candidate == intel_candidate_type_procedural)
|
||||
{
|
||||
if (intersect_primitive(query,ray,scenes,geom,context,geomID,primID,feature_mask))
|
||||
if (commit_potential_hit (query, ray))
|
||||
break; // shadow rays break at first hit
|
||||
|
||||
}
|
||||
else // if (candidate == TRIANGLE)
|
||||
{
|
||||
ray.tfar = t;
|
||||
Vec3f Ng = intel_get_hit_triangle_normal(query, intel_hit_type_potential_hit);
|
||||
Hit hit(context->user,geomID,primID,Vec2f(uv.x(),uv.y()),Ng);
|
||||
if (invokeTriangleIntersectionFilter(query, geom, bvh_level, ray, hit, context, feature_mask))
|
||||
break; // shadow rays break at first hit
|
||||
}
|
||||
}
|
||||
|
||||
intel_ray_query_start_traversal(query);
|
||||
intel_ray_query_sync(query);
|
||||
}
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> ucontext, sycl::private_ptr<RTCRayHit> rayhit_i, sycl::private_ptr<RTCIntersectArguments> args)
|
||||
{
|
||||
Scene* scene = (Scene*) hscene.get();
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
|
||||
|
||||
Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1];
|
||||
scenes[0] = scene;
|
||||
|
||||
RayQueryContext context(scene, ucontext, args);
|
||||
|
||||
RayHit ray;
|
||||
ray.org = Vec3ff(rayhit_i->ray.org_x, rayhit_i->ray.org_y, rayhit_i->ray.org_z, rayhit_i->ray.tnear);
|
||||
ray.dir = Vec3ff(rayhit_i->ray.dir_x, rayhit_i->ray.dir_y, rayhit_i->ray.dir_z, rayhit_i->ray.time);
|
||||
ray.tfar = rayhit_i->ray.tfar;
|
||||
ray.mask = rayhit_i->ray.mask;
|
||||
ray.id = rayhit_i->ray.id;
|
||||
ray.flags = rayhit_i->ray.flags;
|
||||
ray.Ng = Vec3f(0,0,0);
|
||||
ray.u = 0;
|
||||
ray.v = 0;
|
||||
ray.primID = RTC_INVALID_GEOMETRY_ID;
|
||||
ray.geomID = RTC_INVALID_GEOMETRY_ID;
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
for (uint32_t l=0; l<RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
|
||||
ray.instID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
#else
|
||||
ray.instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
|
||||
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = ray.tfar;
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_none;
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
if (context.enforceArgumentFilterFunction())
|
||||
raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_t query = intel_ray_query_init(raydesc, hwaccel_ptr);
|
||||
intel_ray_query_start_traversal(query);
|
||||
intel_ray_query_sync(query);
|
||||
|
||||
if (args->feature_mask & TRAV_LOOP_FEATURES) {
|
||||
trav_loop(query,ray,scenes,&context,args->feature_mask);
|
||||
}
|
||||
|
||||
bool valid = intel_has_committed_hit(query);
|
||||
|
||||
if (valid)
|
||||
{
|
||||
float t = intel_get_hit_distance(query, intel_hit_type_committed_hit);
|
||||
float2 uv = intel_get_hit_barycentrics (query, intel_hit_type_committed_hit);
|
||||
unsigned int geomID = intel_get_hit_geometry_id(query, intel_hit_type_committed_hit);
|
||||
|
||||
unsigned int primID = ray.primID;
|
||||
if (intel_get_hit_candidate(query, intel_hit_type_committed_hit) == intel_candidate_type_triangle)
|
||||
primID = intel_get_hit_triangle_primitive_id(query, intel_hit_type_committed_hit);
|
||||
|
||||
rayhit_i->ray.tfar = t;
|
||||
rayhit_i->hit.geomID = geomID;
|
||||
rayhit_i->hit.primID = primID;
|
||||
rayhit_i->hit.u = uv.x();
|
||||
rayhit_i->hit.v = uv.y();
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
for (uint32_t l=0; l<RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
|
||||
rayhit_i->hit.instID[l] = ray.instID[l];
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
rayhit_i->hit.instPrimID[l] = ray.instPrimID[l];
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
unsigned int bvh_level = intel_get_hit_bvh_level(query, intel_hit_type_committed_hit);
|
||||
unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_committed_hit);
|
||||
/* when rtcForwardRay was used then we are in software instancing mode */
|
||||
if (bvh_level > 0 && instID == RTC_INVALID_GEOMETRY_ID)
|
||||
instID = ray.instID[0];
|
||||
|
||||
rayhit_i->hit.instID[0] = instID;
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
rayhit_i->hit.instPrimID[0] = ray.instPrimID[0];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* calculate geometry normal for hardware accelerated triangles */
|
||||
if (intel_get_hit_candidate(query, intel_hit_type_committed_hit) == intel_candidate_type_triangle)
|
||||
ray.Ng = intel_get_hit_triangle_normal(query, intel_hit_type_committed_hit);
|
||||
|
||||
rayhit_i->hit.Ng_x = ray.Ng.x;
|
||||
rayhit_i->hit.Ng_y = ray.Ng.y;
|
||||
rayhit_i->hit.Ng_z = ray.Ng.z;
|
||||
}
|
||||
else
|
||||
{
|
||||
rayhit_i->hit.geomID = -1;
|
||||
}
|
||||
|
||||
intel_ray_query_abandon(query);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> ucontext, sycl::private_ptr<RTCRay> ray_i, sycl::private_ptr<RTCOccludedArguments> args)
|
||||
{
|
||||
Scene* scene = (Scene*) hscene.get();
|
||||
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
|
||||
|
||||
Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1];
|
||||
scenes[0] = scene;
|
||||
|
||||
RayQueryContext context(scene, ucontext, args);
|
||||
|
||||
Ray ray;
|
||||
ray.org = Vec3ff(ray_i->org_x, ray_i->org_y, ray_i->org_z, ray_i->tnear);
|
||||
ray.dir = Vec3ff(ray_i->dir_x, ray_i->dir_y, ray_i->dir_z, ray_i->time);
|
||||
ray.tfar = ray_i->tfar;
|
||||
ray.mask = ray_i->mask;
|
||||
ray.id = ray_i->id;
|
||||
ray.flags = ray_i->flags;
|
||||
|
||||
intel_ray_desc_t raydesc;
|
||||
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
|
||||
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
|
||||
raydesc.tmin = ray.tnear();
|
||||
raydesc.tmax = ray.tfar;
|
||||
raydesc.mask = mask32_to_mask8(ray.mask);
|
||||
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
|
||||
#endif
|
||||
|
||||
if (context.enforceArgumentFilterFunction())
|
||||
raydesc.flags |= intel_ray_flags_force_non_opaque;
|
||||
|
||||
uint32_t bvh_id = 0;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
|
||||
if (args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
|
||||
float time = clamp(ray.time(),0.0f,1.0f);
|
||||
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
|
||||
}
|
||||
|
||||
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
|
||||
|
||||
intel_ray_query_t query = intel_ray_query_init(raydesc, hwaccel_ptr);
|
||||
intel_ray_query_start_traversal(query);
|
||||
intel_ray_query_sync(query);
|
||||
|
||||
if (args->feature_mask & TRAV_LOOP_FEATURES) {
|
||||
trav_loop(query,ray,scenes,&context,args->feature_mask);
|
||||
}
|
||||
|
||||
if (intel_has_committed_hit(query))
|
||||
ray_i->tfar = -INFINITY;
|
||||
|
||||
intel_ray_query_abandon(query);
|
||||
}
|
||||
|
||||
RTC_NAMESPACE_END;
|
||||
37
Framework/external/embree/kernels/sycl/rthwif_embree.h
vendored
Normal file
37
Framework/external/embree/kernels/sycl/rthwif_embree.h
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-W#pragma-messages"
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#include "../config.h"
|
||||
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
#include "../../include/embree4/rtcore_ray.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
inline uint8_t mask32_to_mask8( uint32_t mask ) {
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
return (mask & 0xFFFFFF80) ? (0x80 | mask) : mask; // bit 7 indicates that some bit >= 7 is set
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct EmbreeHWAccel
|
||||
{
|
||||
uint32_t numTimeSegments;
|
||||
void* AccelTable[1];
|
||||
};
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void rtcIntersectRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> context, sycl::private_ptr<RTCRayHit> rayhit, sycl::private_ptr<RTCIntersectArguments> args);
|
||||
SYCL_EXTERNAL void rtcOccludedRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> context, sycl::private_ptr<RTCRay> ray, sycl::private_ptr<RTCOccludedArguments> args);
|
||||
652
Framework/external/embree/kernels/sycl/rthwif_embree_builder.cpp
vendored
Normal file
652
Framework/external/embree/kernels/sycl/rthwif_embree_builder.cpp
vendored
Normal file
|
|
@ -0,0 +1,652 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include "rthwif_embree.h"
|
||||
#include "rthwif_embree_builder.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../level_zero/ze_wrapper.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
using namespace embree::isa;
|
||||
|
||||
enum Flags : uint32_t {
|
||||
NONE,
|
||||
DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise <
|
||||
};
|
||||
|
||||
static void align(size_t& ofs, size_t alignment) {
|
||||
ofs = (ofs+(alignment-1))&(-alignment);
|
||||
}
|
||||
|
||||
struct DispatchGlobals
|
||||
{
|
||||
uint64_t rtMemBasePtr; // base address of the allocated stack memory
|
||||
uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0
|
||||
uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks
|
||||
uint32_t numDSSRTStacks : 16; // number of stacks per DSS
|
||||
uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16
|
||||
unsigned _reserved_mbz : 12;
|
||||
uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...)
|
||||
Flags flags; // per context control flags
|
||||
|
||||
static inline size_t GetDispatchGlobalsSize()
|
||||
{
|
||||
size_t maxBVHLevels = RTC_MAX_INSTANCE_LEVEL_COUNT+1;
|
||||
size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64;
|
||||
size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC
|
||||
size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes;
|
||||
return dispatchGlobalSize;
|
||||
}
|
||||
};
|
||||
|
||||
void* zeRTASInitExp(sycl::device device, sycl::context context)
|
||||
{
|
||||
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
|
||||
return nullptr;
|
||||
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
|
||||
size_t dispatchGlobalSize = DispatchGlobals::GetDispatchGlobalsSize();
|
||||
void* dispatchGlobalsPtr = rthwifAllocAccelBuffer(nullptr,dispatchGlobalSize,device,context);
|
||||
memset(dispatchGlobalsPtr, 0, dispatchGlobalSize);
|
||||
|
||||
DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr;
|
||||
dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize;
|
||||
dg->callStackHandlerKSP = 0;
|
||||
dg->asyncStackSize = 0;
|
||||
dg->numDSSRTStacks = 0;
|
||||
dg->syncRayQueryCount = 0;
|
||||
dg->_reserved_mbz = 0;
|
||||
dg->maxBVHLevels = RTC_MAX_INSTANCE_LEVEL_COUNT+1;
|
||||
dg->flags = DEPTH_TEST_LESS_EQUAL;
|
||||
|
||||
return dispatchGlobalsPtr;
|
||||
|
||||
#else
|
||||
|
||||
return nullptr;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void rthwifCleanup(Device* embree_device, void* dispatchGlobalsPtr, sycl::context context)
|
||||
{
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
size_t dispatchGlobalSize = DispatchGlobals::GetDispatchGlobalsSize();
|
||||
rthwifFreeAccelBuffer(embree_device, dispatchGlobalsPtr, dispatchGlobalSize, context);
|
||||
#endif
|
||||
}
|
||||
|
||||
int rthwifIsSYCLDeviceSupported(const sycl::device& sycl_device)
|
||||
{
|
||||
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
|
||||
return -1;
|
||||
|
||||
/* check if ray tracing extension is available */
|
||||
sycl::platform platform = sycl_device.get_platform();
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
|
||||
|
||||
uint32_t count = 0;
|
||||
std::vector<ze_driver_extension_properties_t> extensions;
|
||||
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS) return -1;
|
||||
|
||||
extensions.resize(count);
|
||||
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS) return -1;
|
||||
|
||||
bool ze_extension_ray_tracing = false;
|
||||
bool ze_rtas_builder = false;
|
||||
for (uint32_t i=0; i<extensions.size(); i++)
|
||||
{
|
||||
//std::cout << extensions[i].name << " version " << extensions[i].version << std::endl;
|
||||
|
||||
if (strncmp("ZE_extension_raytracing",extensions[i].name,sizeof(extensions[i].name)) == 0)
|
||||
ze_extension_ray_tracing = true;
|
||||
|
||||
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
|
||||
ze_rtas_builder = true;
|
||||
}
|
||||
if (!ze_extension_ray_tracing)
|
||||
return -1;
|
||||
|
||||
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||
if (!ze_rtas_builder)
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
/* check if ray queries are supported */
|
||||
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(sycl_device);
|
||||
|
||||
/* check if ray tracing hardware is supported */
|
||||
ze_device_raytracing_ext_properties_t raytracing_properties;
|
||||
memset(&raytracing_properties,0,sizeof(raytracing_properties));
|
||||
raytracing_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES;
|
||||
raytracing_properties.pNext = nullptr;
|
||||
|
||||
ze_device_module_properties_t module_properties;
|
||||
memset(&module_properties,0,sizeof(module_properties));
|
||||
module_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES;
|
||||
module_properties.pNext = &raytracing_properties;
|
||||
|
||||
result = ZeWrapper::zeDeviceGetModuleProperties(hDevice, &module_properties);
|
||||
if (result != ZE_RESULT_SUCCESS) return -1;
|
||||
|
||||
const bool rayQuerySupported = raytracing_properties.flags & ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY;
|
||||
if (!rayQuerySupported)
|
||||
return -1;
|
||||
|
||||
return sycl_device.get_info<sycl::info::device::max_compute_units>();
|
||||
}
|
||||
|
||||
void* rthwifAllocAccelBuffer(Device* embree_device, size_t bytes, sycl::device device, sycl::context context)
|
||||
{
|
||||
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
|
||||
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
|
||||
|
||||
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
|
||||
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
|
||||
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDeviceGetProperties properties failed");
|
||||
|
||||
ze_raytracing_mem_alloc_ext_desc_t rt_desc;
|
||||
rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
|
||||
rt_desc.pNext = nullptr;
|
||||
rt_desc.flags = 0;
|
||||
|
||||
ze_relaxed_allocation_limits_exp_desc_t relaxed;
|
||||
relaxed.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC;
|
||||
relaxed.pNext = &rt_desc;
|
||||
relaxed.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE;
|
||||
|
||||
ze_device_mem_alloc_desc_t device_desc;
|
||||
device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
device_desc.pNext = &relaxed;
|
||||
device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED;
|
||||
device_desc.ordinal = 0;
|
||||
|
||||
ze_host_mem_alloc_desc_t host_desc;
|
||||
host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
host_desc.pNext = nullptr;
|
||||
host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
|
||||
|
||||
void* ptr = nullptr;
|
||||
|
||||
if (embree_device) embree_device->memoryMonitor(bytes,false);
|
||||
ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr);
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"rtas memory allocation failed");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void rthwifFreeAccelBuffer(Device* embree_device, void* ptr, size_t bytes, sycl::context context)
|
||||
{
|
||||
if (ptr == nullptr) return;
|
||||
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
|
||||
if (embree_device) embree_device->memoryMonitor(-bytes,false);
|
||||
ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr);
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"rtas memory free failed");
|
||||
}
|
||||
|
||||
struct GEOMETRY_INSTANCE_DESC : ze_rtas_builder_instance_geometry_info_exp_t
|
||||
{
|
||||
ze_rtas_transform_float3x4_aligned_column_major_exp_t xfmdata;
|
||||
};
|
||||
|
||||
struct GEOMETRY_TYPE
|
||||
{
|
||||
GEOMETRY_TYPE(ze_rtas_builder_geometry_type_exp_t type, size_t extraBytes = 0)
|
||||
: type(type), extraBytes(extraBytes) {}
|
||||
|
||||
ze_rtas_builder_geometry_type_exp_t type;
|
||||
size_t extraBytes;
|
||||
};
|
||||
|
||||
size_t sizeof_RTHWIF_GEOMETRY(GEOMETRY_TYPE type)
|
||||
{
|
||||
switch (type.type) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return sizeof(ze_rtas_builder_triangles_geometry_info_exp_t)+type.extraBytes;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return sizeof(ze_rtas_builder_quads_geometry_info_exp_t)+type.extraBytes;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return sizeof(ze_rtas_builder_procedural_geometry_info_exp_t)+type.extraBytes;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return sizeof(ze_rtas_builder_instance_geometry_info_exp_t)+type.extraBytes;
|
||||
default: assert(false); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t alignof_RTHWIF_GEOMETRY(GEOMETRY_TYPE type)
|
||||
{
|
||||
switch (type.type) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return alignof(ze_rtas_builder_triangles_geometry_info_exp_t);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return alignof(ze_rtas_builder_quads_geometry_info_exp_t);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return alignof(ze_rtas_builder_procedural_geometry_info_exp_t);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return alignof(ze_rtas_builder_instance_geometry_info_exp_t);
|
||||
default: assert(false); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
ze_rtas_builder_geometry_exp_flags_t getGeometryFlags(Scene* scene, Geometry* geom)
|
||||
{
|
||||
/* invoke any hit callback when Embree filter functions are present */
|
||||
ze_rtas_builder_geometry_exp_flags_t gflags = 0;
|
||||
if (geom->hasArgumentFilterFunctions() || geom->hasGeometryFilterFunctions())
|
||||
gflags = ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE;
|
||||
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
/* invoke any hit callback when high mask bits are enabled */
|
||||
if (geom->mask & 0xFFFFFF80)
|
||||
gflags = ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE;
|
||||
#endif
|
||||
|
||||
return gflags;
|
||||
}
|
||||
|
||||
void createGeometryDesc(ze_rtas_builder_triangles_geometry_info_exp_t* out, Scene* scene, TriangleMesh* geom)
|
||||
{
|
||||
memset(out,0,sizeof(ze_rtas_builder_triangles_geometry_info_exp_t));
|
||||
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES;
|
||||
out->geometryFlags = getGeometryFlags(scene,geom);
|
||||
out->geometryMask = mask32_to_mask8(geom->mask);
|
||||
out->triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32;
|
||||
out->vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
|
||||
out->pTriangleBuffer = (ze_rtas_triangle_indices_uint32_exp_t*) geom->triangles.getPtr();
|
||||
out->triangleCount = geom->triangles.size();
|
||||
out->triangleStride = geom->triangles.getStride();
|
||||
out->pVertexBuffer = (ze_rtas_float3_exp_t*) geom->vertices0.getPtr();
|
||||
out->vertexCount = geom->vertices0.size();
|
||||
out->vertexStride = geom->vertices0.getStride();
|
||||
}
|
||||
|
||||
void createGeometryDesc(ze_rtas_builder_quads_geometry_info_exp_t* out, Scene* scene, QuadMesh* geom)
|
||||
{
|
||||
memset(out,0,sizeof(ze_rtas_builder_quads_geometry_info_exp_t));
|
||||
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS;
|
||||
out->geometryFlags = getGeometryFlags(scene,geom);
|
||||
out->geometryMask = mask32_to_mask8(geom->mask);
|
||||
out->quadFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32;
|
||||
out->vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
|
||||
out->pQuadBuffer = (ze_rtas_quad_indices_uint32_exp_t*) geom->quads.getPtr();
|
||||
out->quadCount = geom->quads.size();
|
||||
out->quadStride = geom->quads.getStride();
|
||||
out->pVertexBuffer = (ze_rtas_float3_exp_t*) geom->vertices0.getPtr();
|
||||
out->vertexCount = geom->vertices0.size();
|
||||
out->vertexStride = geom->vertices0.getStride();
|
||||
}
|
||||
|
||||
void getProceduralAABB(ze_rtas_geometry_aabbs_exp_cb_params_t* params)
|
||||
{
|
||||
assert(params->stype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS);
|
||||
|
||||
BBox1f time_range = * (BBox1f*) params->pBuildUserPtr;
|
||||
Geometry* geom = (Geometry*) params->pGeomUserPtr;
|
||||
ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut;
|
||||
|
||||
for (uint32_t i=0; i<params->primIDCount; i++)
|
||||
{
|
||||
const uint32_t primID = params->primID+i;
|
||||
PrimRef prim;
|
||||
range<size_t> r(primID);
|
||||
size_t k = 0;
|
||||
uint32_t geomID = 0;
|
||||
|
||||
PrimInfo pinfo = empty;
|
||||
if (geom->numTimeSegments() > 0)
|
||||
pinfo = geom->createPrimRefArrayMB(&prim,time_range,r,k,geomID);
|
||||
else
|
||||
pinfo = geom->createPrimRefArray(&prim,r,k,geomID);
|
||||
|
||||
/* invalid primitive */
|
||||
if (pinfo.size() == 0) {
|
||||
boundsOut[i].lower.x = pos_inf;
|
||||
boundsOut[i].lower.y = pos_inf;
|
||||
boundsOut[i].lower.z = pos_inf;
|
||||
boundsOut[i].upper.x = neg_inf;
|
||||
boundsOut[i].upper.y = neg_inf;
|
||||
boundsOut[i].upper.z = neg_inf;
|
||||
}
|
||||
else
|
||||
{
|
||||
BBox3fa bounds = prim.bounds();
|
||||
boundsOut[i].lower.x = bounds.lower.x;
|
||||
boundsOut[i].lower.y = bounds.lower.y;
|
||||
boundsOut[i].lower.z = bounds.lower.z;
|
||||
boundsOut[i].upper.x = bounds.upper.x;
|
||||
boundsOut[i].upper.y = bounds.upper.y;
|
||||
boundsOut[i].upper.z = bounds.upper.z;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void createGeometryDescProcedural(ze_rtas_builder_procedural_geometry_info_exp_t* out, Scene* scene, Geometry* geom)
|
||||
{
|
||||
uint32_t numPrimitives = geom->size();
|
||||
if (GridMesh* mesh = dynamic_cast<GridMesh*>(geom))
|
||||
numPrimitives = mesh->getNumTotalQuads(); // FIXME: slow
|
||||
|
||||
memset(out,0,sizeof(ze_rtas_builder_procedural_geometry_info_exp_t));
|
||||
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
|
||||
out->geometryFlags = 0;
|
||||
out->geometryMask = mask32_to_mask8(geom->mask);
|
||||
out->primCount = numPrimitives;
|
||||
out->pfnGetBoundsCb = getProceduralAABB;
|
||||
out->pGeomUserPtr = geom;
|
||||
}
|
||||
|
||||
void createGeometryDesc(GEOMETRY_INSTANCE_DESC* out, Scene* scene, Instance* geom)
|
||||
{
|
||||
assert(geom->gsubtype == AccelSet::GTY_SUBTYPE_INSTANCE_QUATERNION);
|
||||
memset(out,0,sizeof(GEOMETRY_INSTANCE_DESC));
|
||||
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
|
||||
out->instanceFlags = 0;
|
||||
out->geometryMask = mask32_to_mask8(geom->mask);
|
||||
out->instanceUserID = 0;
|
||||
const AffineSpace3fa local2world = geom->getLocal2World();
|
||||
out->transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR;
|
||||
out->pTransform = (float*) &out->xfmdata;
|
||||
out->pBounds = (ze_rtas_aabb_exp_t*) &dynamic_cast<Scene*>(geom->object)->hwaccel_bounds;
|
||||
out->xfmdata = *(ze_rtas_transform_float3x4_aligned_column_major_exp_t*) &local2world;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) dynamic_cast<Scene*>(geom->object)->hwaccel.data();
|
||||
out->pAccelerationStructure = hwaccel->AccelTable[0];
|
||||
}
|
||||
|
||||
void createGeometryDesc(ze_rtas_builder_instance_geometry_info_exp_t* out, Scene* scene, Instance* geom)
|
||||
{
|
||||
assert(geom->gsubtype == AccelSet::GTY_SUBTYPE_DEFAULT);
|
||||
memset(out,0,sizeof(ze_rtas_builder_instance_geometry_info_exp_t));
|
||||
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
|
||||
out->instanceFlags = 0;
|
||||
out->geometryMask = mask32_to_mask8(geom->mask);
|
||||
out->instanceUserID = 0;
|
||||
out->transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR;
|
||||
out->pTransform = (float*) &geom->local2world[0];
|
||||
out->pBounds = (ze_rtas_aabb_exp_t*) &dynamic_cast<Scene*>(geom->object)->hwaccel_bounds;
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) dynamic_cast<Scene*>(geom->object)->hwaccel.data();
|
||||
out->pAccelerationStructure = hwaccel->AccelTable[0];
|
||||
}
|
||||
|
||||
void createGeometryDesc(char* out, Scene* scene, Geometry* geom, GEOMETRY_TYPE type)
|
||||
{
|
||||
switch (type.type) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)out,scene,dynamic_cast<TriangleMesh*>(geom));
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t*)out,scene,dynamic_cast<QuadMesh*>(geom));
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryDescProcedural((ze_rtas_builder_procedural_geometry_info_exp_t*)out,scene,geom);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE:
|
||||
if (type.extraBytes) return createGeometryDesc((GEOMETRY_INSTANCE_DESC*)out,scene,dynamic_cast<Instance*>(geom));
|
||||
else return createGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t*)out,scene,dynamic_cast<Instance*>(geom));
|
||||
default: assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
ze_rtas_builder_build_quality_hint_exp_t convertBuildQuality(RTCBuildQuality quality_flags)
|
||||
{
|
||||
switch (quality_flags) {
|
||||
case RTC_BUILD_QUALITY_LOW : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW;
|
||||
case RTC_BUILD_QUALITY_MEDIUM : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
|
||||
case RTC_BUILD_QUALITY_HIGH : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH;
|
||||
case RTC_BUILD_QUALITY_REFIT : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW;
|
||||
default : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
|
||||
}
|
||||
}
|
||||
|
||||
ze_rtas_builder_build_op_exp_flags_t convertBuildFlags(RTCSceneFlags scene_flags, RTCBuildQuality quality_flags)
|
||||
{
|
||||
ze_rtas_builder_build_op_exp_flags_t result = 0;
|
||||
if (scene_flags & RTC_SCENE_FLAG_COMPACT) result |= ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_COMPACT;
|
||||
|
||||
/* only in high quality build mode spatial splits are allowed in Embree */
|
||||
if (quality_flags != RTC_BUILD_QUALITY_HIGH)
|
||||
result |= ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BBox3f rthwifBuild(Scene* scene, AccelBuffer& accel)
|
||||
{
|
||||
DeviceGPU* gpuDevice = dynamic_cast<DeviceGPU*>(scene->device);
|
||||
if (gpuDevice == nullptr) throw std::runtime_error("internal error");
|
||||
|
||||
if (scene->size() > 0x00FFFFFF)
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "too many geometries inside scene");
|
||||
|
||||
sycl::device device = gpuDevice->getGPUDevice();
|
||||
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
|
||||
sycl::platform platform = device.get_platform();
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
|
||||
|
||||
/* create L0 builder object */
|
||||
ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC };
|
||||
ze_rtas_builder_exp_handle_t hBuilder = nullptr;
|
||||
ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "ze_rtas_builder creation failed");
|
||||
|
||||
auto getType = [&](unsigned int geomID) -> GEOMETRY_TYPE
|
||||
{
|
||||
/* no HW support for MB yet */
|
||||
if (scene->get(geomID)->numTimeSegments() > 0)
|
||||
return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
|
||||
|
||||
switch (scene->get(geomID)->getType()) {
|
||||
case Geometry::GTY_FLAT_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ROUND_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_LINEAR_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_CONE_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_FLAT_BEZIER_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ROUND_BEZIER_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_BEZIER_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_FLAT_BSPLINE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ROUND_BSPLINE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_BSPLINE_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_FLAT_HERMITE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ROUND_HERMITE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_HERMITE_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_FLAT_CATMULL_ROM_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ROUND_CATMULL_ROM_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_CATMULL_ROM_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_TRIANGLE_MESH: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES; break;
|
||||
case Geometry::GTY_QUAD_MESH : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS; break;
|
||||
case Geometry::GTY_GRID_MESH : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_SUBDIV_MESH : assert(false); return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_SPHERE_POINT : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_DISC_POINT : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_ORIENTED_DISC_POINT: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
case Geometry::GTY_USER_GEOMETRY : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_INSTANCE_ARRAY : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT < 2
|
||||
case Geometry::GTY_INSTANCE_CHEAP :
|
||||
case Geometry::GTY_INSTANCE_EXPENSIVE: {
|
||||
Instance* instance = scene->get<Instance>(geomID);
|
||||
EmbreeHWAccel* object = (EmbreeHWAccel*)((Scene*)instance->object)->hwaccel.data();
|
||||
if (object->numTimeSegments > 1) return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; // we need to handle instances in procedural mode if instanced scene has motion blur
|
||||
if (instance->mask & 0xFFFFFF80) return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; // we need to handle instances in procedural mode if high mask bits are set
|
||||
else if (instance->gsubtype == AccelSet::GTY_SUBTYPE_INSTANCE_QUATERNION)
|
||||
return GEOMETRY_TYPE(ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE,sizeof(GEOMETRY_INSTANCE_DESC)-sizeof(ze_rtas_builder_instance_geometry_info_exp_t));
|
||||
else return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
|
||||
}
|
||||
#else
|
||||
case Geometry::GTY_INSTANCE_CHEAP : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
case Geometry::GTY_INSTANCE_EXPENSIVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
|
||||
#endif
|
||||
|
||||
default: assert(false); return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
|
||||
}
|
||||
};
|
||||
|
||||
/* calculate maximal number of motion blur time segments in scene */
|
||||
uint32_t maxTimeSegments = 1;
|
||||
for (size_t geomID=0; geomID<scene->size(); geomID++)
|
||||
{
|
||||
Geometry* geom = scene->get(geomID);
|
||||
if (geom == nullptr) continue;
|
||||
maxTimeSegments = std::max(maxTimeSegments, geom->numTimeSegments());
|
||||
}
|
||||
|
||||
/* calculate size of geometry descriptor buffer */
|
||||
size_t totalBytes = 0;
|
||||
for (size_t geomID=0; geomID<scene->size(); geomID++)
|
||||
{
|
||||
Geometry* geom = scene->get(geomID);
|
||||
if (geom == nullptr) continue;
|
||||
|
||||
const GEOMETRY_TYPE type = getType(geomID);
|
||||
align(totalBytes,alignof_RTHWIF_GEOMETRY(type));
|
||||
totalBytes += sizeof_RTHWIF_GEOMETRY(type);
|
||||
}
|
||||
|
||||
/* fill geomdesc buffers */
|
||||
mvector<ze_rtas_builder_geometry_info_exp_t*> geomDescr(scene->device, scene->size());
|
||||
mvector<char> geomDescrData(scene->device,totalBytes);
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t geomID=0; geomID<scene->size(); geomID++)
|
||||
{
|
||||
geomDescr[geomID] = nullptr;
|
||||
Geometry* geom = scene->get(geomID);
|
||||
if (geom == nullptr) continue;
|
||||
|
||||
const GEOMETRY_TYPE type = getType(geomID);
|
||||
align(offset,alignof_RTHWIF_GEOMETRY(type));
|
||||
createGeometryDesc(&geomDescrData[offset],scene,scene->get(geomID),type);
|
||||
geomDescr[geomID] = (ze_rtas_builder_geometry_info_exp_t*) &geomDescrData[offset];
|
||||
offset += sizeof_RTHWIF_GEOMETRY(type);
|
||||
assert(offset <= geomDescrData.size());
|
||||
}
|
||||
|
||||
ze_rtas_parallel_operation_exp_handle_t parallelOperation = nullptr;
|
||||
err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, ¶llelOperation);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "parallel operation creation failed");
|
||||
|
||||
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
|
||||
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
|
||||
err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDeviceGetProperties properties failed");
|
||||
|
||||
/* estimate static accel size */
|
||||
BBox1f time_range(0,1);
|
||||
ze_rtas_aabb_exp_t bounds;
|
||||
ze_rtas_builder_build_op_exp_desc_t args;
|
||||
memset(&args,0,sizeof(args));
|
||||
args.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC;
|
||||
args.pNext = nullptr;
|
||||
args.rtasFormat = rtasProp.rtasFormat;
|
||||
args.buildQuality = convertBuildQuality(scene->quality_flags);
|
||||
args.buildFlags = convertBuildFlags(scene->scene_flags,scene->quality_flags);
|
||||
args.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t**) geomDescr.data();
|
||||
args.numGeometries = geomDescr.size();
|
||||
|
||||
/* just for debugging purposes */
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC };
|
||||
buildOpDebug.dispatchGlobalsPtr = dynamic_cast<DeviceGPU*>(scene->device)->dispatchGlobalsPtr;
|
||||
args.pNext = &buildOpDebug;
|
||||
#endif
|
||||
|
||||
ze_rtas_builder_exp_properties_t sizeTotal = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
|
||||
err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&args,&sizeTotal);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"BVH size estimate failed");
|
||||
|
||||
/* allocate scratch buffer */
|
||||
mvector<char> scratchBuffer(scene->device,sizeTotal.scratchBufferSizeBytes);
|
||||
|
||||
size_t headerBytes = sizeof(EmbreeHWAccel) + std::max(1u,maxTimeSegments)*8;
|
||||
align(headerBytes,128);
|
||||
|
||||
/* build BVH */
|
||||
BBox3f fullBounds = empty;
|
||||
while (true)
|
||||
{
|
||||
/* estimate size of all mblur BVHs */
|
||||
ze_rtas_builder_exp_properties_t size = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
|
||||
size.rtasBufferSizeBytesExpected = maxTimeSegments*sizeTotal.rtasBufferSizeBytesExpected;
|
||||
size.rtasBufferSizeBytesMaxRequired = maxTimeSegments*sizeTotal.rtasBufferSizeBytesMaxRequired;
|
||||
size_t bytes = headerBytes+size.rtasBufferSizeBytesExpected;
|
||||
|
||||
/* allocate BVH data */
|
||||
if (accel.size() < bytes) accel.resize(bytes);
|
||||
memset(accel.data(),0,accel.size()); // FIXME: not required
|
||||
|
||||
/* build BVH for each time segment */
|
||||
for (uint32_t i=0; i<maxTimeSegments; i++)
|
||||
{
|
||||
const float t0 = float(i+0)/float(maxTimeSegments);
|
||||
const float t1 = float(i+1)/float(maxTimeSegments);
|
||||
time_range = BBox1f(t0,t1);
|
||||
|
||||
void* accelBuffer = accel.data() + headerBytes + i*sizeTotal.rtasBufferSizeBytesExpected;
|
||||
size_t accelBufferBytes = sizeTotal.rtasBufferSizeBytesExpected;
|
||||
bounds = { { INFINITY, INFINITY, INFINITY }, { -INFINITY, -INFINITY, -INFINITY } };
|
||||
|
||||
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&args,
|
||||
scratchBuffer.data(),scratchBuffer.size(),
|
||||
accelBuffer, accelBufferBytes,
|
||||
parallelOperation,
|
||||
&time_range, &bounds, nullptr);
|
||||
if (parallelOperation)
|
||||
{
|
||||
assert(err == ZE_RESULT_EXP_RTAS_BUILD_DEFERRED);
|
||||
|
||||
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
|
||||
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(parallelOperation,&prop);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "get max concurrency failed");
|
||||
|
||||
parallel_for(prop.maxConcurrency, [&](uint32_t) { err = ZeWrapper::zeRTASParallelOperationJoinExp(parallelOperation); });
|
||||
}
|
||||
|
||||
fullBounds.extend(*(BBox3f*) &bounds);
|
||||
|
||||
if (err == ZE_RESULT_EXP_RTAS_BUILD_RETRY)
|
||||
{
|
||||
if (sizeTotal.rtasBufferSizeBytesExpected == sizeTotal.rtasBufferSizeBytesMaxRequired)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"build error");
|
||||
|
||||
sizeTotal.rtasBufferSizeBytesExpected = std::min(sizeTotal.rtasBufferSizeBytesMaxRequired,(size_t(1.2*sizeTotal.rtasBufferSizeBytesExpected)+127)&-128);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err != ZE_RESULT_SUCCESS) break;
|
||||
}
|
||||
if (err != ZE_RESULT_EXP_RTAS_BUILD_RETRY) break;
|
||||
}
|
||||
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"build error");
|
||||
|
||||
/* destroy parallel operation */
|
||||
err = ZeWrapper::zeRTASParallelOperationDestroyExp(parallelOperation);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "parallel operation destruction failed");
|
||||
|
||||
/* destroy rtas builder again */
|
||||
err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "ze_rtas_builder destruction failed");
|
||||
|
||||
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) accel.data();
|
||||
hwaccel->numTimeSegments = maxTimeSegments;
|
||||
|
||||
for (size_t i=0; i<maxTimeSegments; i++)
|
||||
hwaccel->AccelTable[i] = (char*)hwaccel + headerBytes + i*sizeTotal.rtasBufferSizeBytesExpected;
|
||||
|
||||
return fullBounds;
|
||||
}
|
||||
}
|
||||
74
Framework/external/embree/kernels/sycl/rthwif_embree_builder.h
vendored
Normal file
74
Framework/external/embree/kernels/sycl/rthwif_embree_builder.h
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/sys/platform.h"
|
||||
#include "../../common/sys/sycl.h"
|
||||
#include "../../common/sys/vector.h"
|
||||
#include "../../common/math/bbox.h"
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class Scene;
|
||||
|
||||
void* rthwifAllocAccelBuffer(Device* embree_device, size_t bytes, sycl::device device, sycl::context context);
|
||||
|
||||
void rthwifFreeAccelBuffer(Device* embree_device, void* ptr, size_t bytes, sycl::context context);
|
||||
|
||||
/*! allocator that performs BVH memory allocations */
|
||||
template<typename T>
|
||||
struct AccelAllocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
AccelAllocator()
|
||||
: device(nullptr), context(nullptr) {}
|
||||
|
||||
AccelAllocator(Device* embree_device, const sycl::device& device, const sycl::context& context)
|
||||
: embree_device(embree_device), device(&device), context(&context) {}
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
if (context && device)
|
||||
return (pointer) rthwifAllocAccelBuffer(embree_device,n*sizeof(T),*device,*context);
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
if (context)
|
||||
rthwifFreeAccelBuffer(embree_device,p,n*sizeof(T),*context);
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Device* embree_device;
|
||||
const sycl::device* device;
|
||||
const sycl::context* context;
|
||||
};
|
||||
|
||||
typedef vector_t<char,AccelAllocator<char>> AccelBuffer;
|
||||
|
||||
void* zeRTASInitExp(sycl::device device, sycl::context context);
|
||||
|
||||
void rthwifCleanup(Device* embree_device, void* dispatchGlobalsPtr, sycl::context context);
|
||||
|
||||
int rthwifIsSYCLDeviceSupported(const sycl::device& sycl_device);
|
||||
|
||||
BBox3f rthwifBuild(Scene* scene, AccelBuffer& buffer_o);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue