Initial commit.

This commit is contained in:
hal8174 2024-04-23 10:14:24 +02:00
commit d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions

View file

@ -0,0 +1,168 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_EXPORT_API
#include "../common/default.h"
#include "../common/device.h"
#include "../common/scene.h"
#include "../common/context.h"
#include "../geometry/filter.h"
#include "rthwif_embree.h"
using namespace embree;
#define DBG(x)
RTC_NAMESPACE_BEGIN;
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDeviceInternal(sycl::context context, const char* config);
void use_rthwif_embree();
void use_rthwif_production();
/* we define rtcNewSYCLDevice in libembree_sycl.a to avoid drop of rtcore_sycl.o during linking of libembree_sycl.a file */
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config)
{
use_rthwif_embree(); // to avoid drop of rthwif_embree.o during linking of libembree_sycl.a file
#if defined(EMBREE_SYCL_RT_VALIDATION_API)
use_rthwif_production(); // to avoid drop of rthwif_production.o during linking of libembree_sycl.a file
#endif
return rtcNewSYCLDeviceInternal(context, config);
}
#if defined(EMBREE_SYCL_SUPPORT) && (defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION))
SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersect1(RTCScene hscene, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args)
{
RTCIntersectArguments default_args;
if (args == nullptr) {
rtcInitIntersectArguments(&default_args);
args = &default_args;
}
RTCRayQueryContext* context = args->context;
RTCRayQueryContext defaultContext;
if (unlikely(context == nullptr)) {
rtcInitRayQueryContext(&defaultContext);
context = &defaultContext;
}
rtcIntersectRTHW(hscene, context, rayhit, args);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardIntersect1(const RTCIntersectFunctionNArguments* args_, RTCScene scene, struct RTCRay* iray, unsigned int instID)
{
return rtcForwardIntersect1Ex(args_, scene, iray, instID, 0);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardIntersect1Ex(const RTCIntersectFunctionNArguments* args_, RTCScene scene, struct RTCRay* iray, unsigned int instID, unsigned int instPrimID)
{
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_;
assert(args->N == 1);
assert(args->forward_scene == nullptr);
Ray* oray = (Ray*)args->rayhit;
oray->org.x = iray->org_x;
oray->org.y = iray->org_y;
oray->org.z = iray->org_z;
oray->dir.x = iray->dir_x;
oray->dir.y = iray->dir_y;
oray->dir.z = iray->dir_z;
args->forward_scene = scene;
instance_id_stack::push(args->context, instID, instPrimID);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccluded1(RTCScene hscene, struct RTCRay* ray, struct RTCOccludedArguments* args)
{
RTCOccludedArguments default_args;
if (args == nullptr) {
rtcInitOccludedArguments(&default_args);
args = &default_args;
}
RTCRayQueryContext* context = args->context;
RTCRayQueryContext defaultContext;
if (unlikely(context == nullptr)) {
rtcInitRayQueryContext(&defaultContext);
context = &defaultContext;
}
rtcOccludedRTHW(hscene, context, ray, args);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardOccluded1(const RTCOccludedFunctionNArguments *args_, RTCScene scene, struct RTCRay *iray, unsigned int instID){
return rtcForwardOccluded1Ex(args_, scene, iray, instID, 0);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcForwardOccluded1Ex(const RTCOccludedFunctionNArguments *args_, RTCScene scene, struct RTCRay *iray, unsigned int instID, unsigned int instPrimID)
{
OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_;
assert(args->N == 1);
assert(args->forward_scene == nullptr);
Ray* oray = (Ray*)args->ray;
oray->org.x = iray->org_x;
oray->org.y = iray->org_y;
oray->org.z = iray->org_z;
oray->dir.x = iray->dir_x;
oray->dir.y = iray->dir_y;
oray->dir.z = iray->dir_z;
args->forward_scene = scene;
instance_id_stack::push(args->context, instID, instPrimID);
}
SYCL_EXTERNAL __attribute__((always_inline)) void* rtcGetGeometryUserDataFromScene (RTCScene hscene, unsigned int geomID)
{
Scene* scene = (Scene*) hscene;
//RTC_CATCH_BEGIN;
//RTC_TRACE(rtcGetGeometryUserDataFromScene);
#if defined(DEBUG)
//RTC_VERIFY_HANDLE(hscene);
//RTC_VERIFY_GEOMID(geomID);
#endif
//RTC_ENTER_DEVICE(hscene); // do not enable for performance reasons
return scene->get(geomID)->getUserData();
//RTC_CATCH_END2(scene);
//return nullptr;
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcGetGeometryTransformFromScene(RTCScene hscene, unsigned int geomID, float time, enum RTCFormat format, void* xfm)
{
Scene* scene = (Scene*) hscene;
//RTC_CATCH_BEGIN;
//RTC_TRACE(rtcGetGeometryTransformFromScene);
//RTC_ENTER_DEVICE(hscene);
AffineSpace3fa transform = one;
Geometry* geom = scene->get(geomID);
if (geom->getTypeMask() & Geometry::MTY_INSTANCE) {
Instance* instance = (Instance*) geom;
if (likely(instance->numTimeSteps <= 1))
transform = instance->getLocal2World();
else
transform = instance->getLocal2World(time);
}
storeTransform(transform, format, (float*)xfm);
//RTC_CATCH_END2(geometry);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcInvokeIntersectFilterFromGeometry(const RTCIntersectFunctionNArguments* args_i, const RTCFilterFunctionNArguments* filter_args)
{
#if EMBREE_SYCL_GEOMETRY_CALLBACK
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
if (args->geometry->intersectionFilterN)
args->geometry->intersectionFilterN(filter_args);
#endif
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcInvokeOccludedFilterFromGeometry(const RTCOccludedFunctionNArguments* args_i, const RTCFilterFunctionNArguments* filter_args)
{
#if EMBREE_SYCL_GEOMETRY_CALLBACK
OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i;
if (args->geometry->occlusionFilterN)
args->geometry->occlusionFilterN(filter_args);
#endif
}
#endif
RTC_NAMESPACE_END;

View file

@ -0,0 +1,866 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "../common/default.h"
#include "../common/device.h"
#include "../common/scene.h"
#include "../common/context.h"
#include "../../include/embree4/rtcore.h"
#include "../geometry/disc_intersector.h"
#include "../geometry/sphere_intersector.h"
#include "../geometry/roundline_intersector.h"
#include "../geometry/curve_intersector_ribbon.h"
#include "../geometry/curve_intersector_oriented.h"
#include "../geometry/curve_intersector_sweep.h"
#include "../geometry/curve_intersector_distance.h"
#include "../geometry/curve_intersector_ribbon.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/instance_array_intersector.h"
#include "../geometry/intersector_epilog_sycl.h"
#include "../geometry/triangle_intersector_moeller.h"
#include "../geometry/triangle_intersector_pluecker.h"
#include "rthwif_embree.h"
#include "../rthwif/rttrace/rttrace.h"
using namespace embree;
intel_ray_flags_t operator |(intel_ray_flags_t a, intel_ray_flags_t b) {
return (intel_ray_flags_t) ((uint32_t)a | (uint32_t)b);
}
intel_ray_flags_t operator |= (intel_ray_flags_t& a, intel_ray_flags_t b) {
return a = a | b;
}
RTC_NAMESPACE_BEGIN;
//#if defined(EMBREE_SYCL_ROBUST)
#define TriangleIntersector isa::PlueckerIntersector1<1,true>
#define ROBUST_MODE true
//#else
//#define TriangleIntersector isa::MoellerTrumboreIntersector1<1,true>
//#define ROBUST_MODE false
//#endif
const constexpr uint32_t TRAV_LOOP_FEATURES =
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
RTC_FEATURE_FLAG_TRIANGLE | // filter function enforced for triangles and quads in this case
RTC_FEATURE_FLAG_QUAD |
#endif
#if defined(EMBREE_RAY_MASK)
RTC_FEATURE_FLAG_32_BIT_RAY_MASK |
#endif
RTC_FEATURE_FLAG_MOTION_BLUR |
RTC_FEATURE_FLAG_CURVES |
RTC_FEATURE_FLAG_GRID |
RTC_FEATURE_FLAG_POINT |
RTC_FEATURE_FLAG_USER_GEOMETRY |
RTC_FEATURE_FLAG_INSTANCE |
RTC_FEATURE_FLAG_INSTANCE_ARRAY |
RTC_FEATURE_FLAG_FILTER_FUNCTION;
void use_rthwif_embree() {
}
__forceinline Vec3f intel_get_hit_triangle_normal(intel_ray_query_t& query, intel_hit_type_t hit_type)
{
intel_float3 v[3]; intel_get_hit_triangle_vertices(query, v, hit_type);
const Vec3f v0(v[0].x, v[0].y, v[0].z);
const Vec3f v1(v[1].x, v[1].y, v[1].z);
const Vec3f v2(v[2].x, v[2].y, v[2].z);
return cross(v1-v0, v2-v0);
}
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & geom->mask) == 0)
return false;
#endif
RTCScene forward_scene = nullptr;
const bool ishit = geom->intersect(ray,geomID,primID,context,forward_scene);
if (!forward_scene) return ishit;
/* forward ray to instanced scene */
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
Scene* scene = (Scene*) forward_scene;
scenes[bvh_level] = scene;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_force_non_opaque;
//if (context.enforceArgumentFilterFunction())
// raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[0];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & geom->mask) == 0)
return false;
#endif
RTCScene forward_scene = nullptr;
const bool ishit = geom->occluded(ray,geomID,primID,context,forward_scene);
if (!forward_scene) return ishit;
/* forward ray to instanced scene */
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
Scene* scene = (Scene*) forward_scene;
scenes[bvh_level] = scene;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_force_non_opaque | intel_ray_flags_accept_first_hit_and_end_search;
//if (context.enforceArgumentFilterFunction())
// raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[0];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
template<typename Ray>
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID);
template<>
__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & instance->mask) == 0)
return false;
#endif
if (!instance_id_stack::push(context->user, geomID, 0))
return false;
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
Scene* object = (Scene*) instance->object;
const AffineSpace3fa world2local = instance->getWorld2Local(ray.time());
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_force_non_opaque;
//if (context.enforceArgumentFilterFunction())
// raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
template<>
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Instance* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & instance->mask) == 0)
return false;
#endif
if (!instance_id_stack::push(context->user, geomID, 0))
return false;
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
Scene* object = (Scene*) instance->object;
const AffineSpace3fa world2local = instance->getWorld2Local(ray.time());
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
if (context->enforceArgumentFilterFunction())
raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
template<typename Ray>
__forceinline bool intersect_instance_array(intel_ray_query_t& query, Ray& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID);
template<>
__forceinline bool intersect_instance_array(intel_ray_query_t& query, RayHit& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
Scene* object = (Scene*) instance->getObject(primID);
if (!object) return false;
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & instance->mask) == 0)
return false;
#endif
if (!instance_id_stack::push(context->user, geomID, primID))
return false;
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
const AffineSpace3fa world2local = instance->getWorld2Local(primID, ray.time());
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_force_non_opaque;
//if (context.enforceArgumentFilterFunction())
// raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
template<>
__forceinline bool intersect_instance_array(intel_ray_query_t& query, Ray& ray, InstanceArray* instance, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
Scene* object = (Scene*) instance->getObject(primID);
if (!object) return false;
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & instance->mask) == 0)
return false;
#endif
if (!instance_id_stack::push(context->user, geomID, primID))
return false;
unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit ) + 1;
const AffineSpace3fa world2local = instance->getWorld2Local(primID, ray.time());
const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
raydesc.direction = float3(ray_dir.x, ray_dir.y, ray_dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = inf; // unused
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
if (context->enforceArgumentFilterFunction())
raydesc.flags |= intel_ray_flags_force_non_opaque;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) object->hwaccel.data();
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (context->args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_forward_ray(query, raydesc, hwaccel_ptr);
return false;
}
template<typename Ray>
__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], Geometry* geom, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask)
{
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
bool filter = feature_mask & (RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY);
if (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
if (ray.time() < geom->time_range.lower || geom->time_range.upper < ray.time())
return false;
}
#if defined(EMBREE_GEOMETRY_USER)
if ((feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY) && (geom->getType() == Geometry::GTY_USER_GEOMETRY)) {
return intersect_user_geometry(query,ray,(UserGeometry*)geom, scenes, context, geomID, primID);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
if ((feature_mask & RTC_FEATURE_FLAG_INSTANCE) && (geom->getTypeMask() & Geometry::MTY_INSTANCE)) {
return intersect_instance(query,ray,(Instance*)geom, scenes, context, geomID, primID);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
if ((feature_mask & RTC_FEATURE_FLAG_INSTANCE_ARRAY) && (geom->getTypeMask() & Geometry::MTY_INSTANCE_ARRAY)) {
return intersect_instance_array(query,ray,(InstanceArray*)geom, scenes, context, geomID, primID);
}
#endif
isa::CurvePrecalculations1 pre(ray,context->scene);
const Geometry::GType gtype MAYBE_UNUSED = geom->getType();
const Geometry::GType stype MAYBE_UNUSED = (Geometry::GType)(gtype & Geometry::GTY_SUBTYPE_MASK);
const Geometry::GType basis MAYBE_UNUSED = (Geometry::GType)(gtype & Geometry::GTY_BASIS_MASK);
#if defined(EMBREE_GEOMETRY_TRIANGLE)
if (gtype == Geometry::GTY_TRIANGLE_MESH && (feature_mask & RTC_FEATURE_FLAG_TRIANGLE) && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
{
const TriangleMesh* geom = context->scene->get<TriangleMesh>(geomID);
const TriangleMesh::Triangle triangle = geom->triangle(primID);
Vec3fa v0 = geom->vertex(triangle.v[0], ray.time());
Vec3fa v1 = geom->vertex(triangle.v[1], ray.time());
Vec3fa v2 = geom->vertex(triangle.v[2], ray.time());
return TriangleIntersector().intersect(ray,v0,v1,v2,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
} else
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
if (gtype == Geometry::GTY_QUAD_MESH && (feature_mask & RTC_FEATURE_FLAG_QUAD) && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
{
const QuadMesh* geom = context->scene->get<QuadMesh>(geomID);
const QuadMesh::Quad quad = geom->quad(primID);
Vec3fa v0 = geom->vertex(quad.v[0], ray.time());
Vec3fa v1 = geom->vertex(quad.v[1], ray.time());
Vec3fa v2 = geom->vertex(quad.v[2], ray.time());
Vec3fa v3 = geom->vertex(quad.v[3], ray.time());
bool ishit0 = TriangleIntersector().intersect(ray,v0,v1,v3,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
bool ishit1 = TriangleIntersector().intersect(ray,v2,v3,v1,[&](float &u, float &v, Vec3f& Ng) { u = 1.f - u; v = 1.f - v; }, Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, primID, filter));
return ishit0 || ishit1;
} else
#endif
#if defined(EMBREE_GEOMETRY_GRID)
if (gtype == Geometry::GTY_GRID_MESH && (feature_mask & RTC_FEATURE_FLAG_GRID))
{
const GridMesh* mesh = context->scene->get<GridMesh>(geomID);
const GridMesh::PrimID_XY c = mesh->quadID_to_primID_xy[primID];
const GridMesh::Grid& g = mesh->grid(c.primID);
auto map_uv0 = [&](float &u, float &v, Vec3f& Ng) {
const Vec2f uv(u,v);
u = (c.x + uv.x)/(g.resX-1);
v = (c.y + uv.y)/(g.resY-1);
};
auto map_uv1 = [&](float &u, float &v, Vec3f& Ng) {
const Vec2f uv(1.0f-u, 1.0f-v);
u = (c.x + uv.x)/(g.resX-1);
v = (c.y + uv.y)/(g.resY-1);
};
Vec3fa v0,v1,v2,v3;
mesh->gather_quad_vertices_safe(v0,v1,v2,v3,g,c.x,c.y,ray.time());
bool ishit0 = TriangleIntersector().intersect(ray,v0,v1,v3,map_uv0,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, c.primID, filter));
bool ishit1 = TriangleIntersector().intersect(ray,v2,v3,v1,map_uv1,Intersect1Epilog1_HWIF<Ray>(ray, context, geomID, c.primID, filter));
return ishit0 || ishit1;
} else
#endif
#if defined(EMBREE_GEOMETRY_POINT)
if (gtype == Geometry::GTY_SPHERE_POINT && (feature_mask & RTC_FEATURE_FLAG_SPHERE_POINT))
{
const Points* geom = context->scene->get<Points>(geomID);
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
return isa::SphereIntersector1<1>::intersect(true, ray, pre, vr, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else if (gtype == Geometry::GTY_DISC_POINT && (feature_mask & RTC_FEATURE_FLAG_DISC_POINT))
{
const Points* geom = context->scene->get<Points>(geomID);
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
return isa::DiscIntersector1<1>::intersect(true, ray, nullptr, nullptr, pre, vr, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else if (gtype == Geometry::GTY_ORIENTED_DISC_POINT && (feature_mask & RTC_FEATURE_FLAG_ORIENTED_DISC_POINT))
{
const Points* geom = context->scene->get<Points>(geomID);
const Vec3ff xyzr = geom->vertex_safe(primID, ray.time());
const Vec4f vr(xyzr.x,xyzr.y,xyzr.z,xyzr.w);
const Vec3f n = geom->normal_safe(primID, ray.time());
return isa::DiscIntersector1<1>::intersect(true, ray, nullptr, nullptr, pre, vr, n, Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
} else
#endif
#if defined(EMBREE_GEOMETRY_CURVE)
if (geom->getTypeMask() & Geometry::MTY_CURVES)
{
if (gtype == Geometry::GTY_FLAT_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE))
{
LineSegments* geom = context->scene->get<LineSegments>(geomID);
Vec3ff v0, v1; geom->gather_safe(v0,v1,geom->segment(primID),ray.time());
return isa::FlatLinearCurveIntersector1<1>::intersect(true,ray,context,geom,pre,v0,v1,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else if (gtype == Geometry::GTY_ROUND_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE))
{
LineSegments* geom = context->scene->get<LineSegments>(geomID);
Vec3ff v0,v1,v2,v3; geom->gather_safe(v0,v1,v2,v3,primID,geom->segment(primID),ray.time());
return isa::RoundLinearCurveIntersector1<1>().intersect(true,ray,context,geom,pre,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else if (gtype == Geometry::GTY_CONE_LINEAR_CURVE && (feature_mask & RTC_FEATURE_FLAG_CONE_LINEAR_CURVE))
{
LineSegments* geom = context->scene->get<LineSegments>(geomID);
Vec3ff v0 = zero, v1 = zero; bool cL = false, cR = false;
geom->gather_safe(v0,v1,cL,cR,primID,geom->segment(primID),ray.time());
return isa::ConeCurveIntersector1<1>().intersect(true,ray,context,geom,pre,v0,v1,cL,cR,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else
{
CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
if (stype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_CURVES))
{
using Intersector = isa::OrientedCurve1Intersector1<CubicBezierCurve,8,1>;
using Curve = isa::TensorLinearCubicBezierSurface3fa;
if (geom->numTimeSegments() > 0 && (feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR))
{
Curve curve;
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE)) {
curve = geom->getNormalOrientedHermiteCurveSafe<HermiteCurveT<Vec3ff>, HermiteCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
}
else if (basis == Geometry::GTY_BASIS_BSPLINE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE)) {
curve = geom->getNormalOrientedCurveSafe<BSplineCurveT<Vec3ff>, BSplineCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
}
else if (basis == Geometry::GTY_BASIS_CATMULL_ROM && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE)) {
curve = geom->getNormalOrientedCurveSafe<CatmullRomCurveT<Vec3ff>, CatmullRomCurveT<Vec3fa>, Curve>(context,ray.org,primID,ray.time());
}
else {
curve = geom->getNormalOrientedCurveSafe<Intersector::SourceCurve3ff, Intersector::SourceCurve3fa, Curve>(context,ray.org,primID,ray.time());
}
return Intersector().intersect(pre,ray,context,geom,primID,curve,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else
{
Vec3ff v0,v1,v2,v3;
Vec3fa n0,n1,n2,n3;
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE))
geom->gather_hermite_safe(v0,v1,n0,n1,v2,v3,n2,n3,geom->curve(primID),ray.time());
else
geom->gather_safe(v0,v1,v2,v3,n0,n1,n2,n3,geom->curve(primID),ray.time());
isa::convert_to_bezier(gtype, v0,v1,v2,v3, n0,n1,n2,n3);
return Intersector().intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,n0,n1,n2,n3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
}
else if (feature_mask & (RTC_FEATURE_FLAG_FLAT_CURVES | RTC_FEATURE_FLAG_ROUND_CURVES)) {
Vec3ff v0,v1,v2,v3;
if (basis == Geometry::GTY_BASIS_HERMITE && (feature_mask & (RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE | RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE)))
geom->gather_hermite_safe(v0,v1,v2,v3,geom->curve(primID),ray.time());
else
geom->gather_safe(v0,v1,v2,v3,geom->curve(primID),ray.time());
isa::convert_to_bezier(gtype, v0,v1,v2,v3);
if (stype == Geometry::GTY_SUBTYPE_FLAT_CURVE && (feature_mask & RTC_FEATURE_FLAG_FLAT_CURVES))
{
isa::RibbonCurve1Intersector1<CubicBezierCurve,1> intersector;
return intersector.intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
else if (stype == Geometry::GTY_SUBTYPE_ROUND_CURVE && (feature_mask & RTC_FEATURE_FLAG_ROUND_CURVES))
{
isa::SweepCurve1Intersector1<CubicBezierCurve> intersector;
return intersector.intersect(pre,ray,context,geom,primID,v0,v1,v2,v3,Intersect1Epilog1_HWIF<Ray>(ray,context,geomID,primID,filter));
}
return false;
}
return false;
}
} else
#endif
#endif
return false;
}
__forceinline bool invokeTriangleIntersectionFilter(intel_ray_query_t& query, Geometry* geom, uint32_t bvh_level, RayHit& ray, Hit& hit, sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
{
#if defined(EMBREE_FILTER_FUNCTION)
if (!(feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION) || runIntersectionFilter1SYCL(geom, ray, context, hit))
#endif
{
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(hit.u, hit.v));
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
ray.instID[l] = hit.instID[l];
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
ray.instPrimID[l] = hit.instPrimID[l];
#endif
}
}
return false;
}
__forceinline bool invokeTriangleIntersectionFilter(intel_ray_query_t& query, Geometry* geom, uint32_t bvh_level, Ray& ray, Hit& hit, sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
{
bool ishit = true;
#if defined(EMBREE_FILTER_FUNCTION)
ishit = !(feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION) || runIntersectionFilter1SYCL(geom, ray, context, hit);
if (ishit)
#endif
{
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(hit.u, hit.v));
}
return ishit;
}
__forceinline bool commit_potential_hit(intel_ray_query_t& query, RayHit& ray) {
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(ray.u, ray.v));
return false;
}
__forceinline bool commit_potential_hit(intel_ray_query_t& query, Ray& ray) {
intel_ray_query_commit_potential_hit_override (query, ray.tfar, float2(0.0f, 0.0f));
return true;
}
template<typename Ray>
__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1], sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
{
while (!intel_is_traversal_done(query))
{
intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit);
const unsigned int bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
const float3 org = intel_get_ray_origin ( query, bvh_level );
const float3 dir = intel_get_ray_direction( query, bvh_level );
const float t = intel_get_hit_distance(query, intel_hit_type_potential_hit);
const float2 uv = intel_get_hit_barycentrics (query, intel_hit_type_potential_hit);
const unsigned int geomID = intel_get_hit_geometry_id(query, intel_hit_type_potential_hit);
const unsigned int primID = intel_get_hit_primitive_id(query, intel_hit_type_potential_hit);
ray.org = Vec3ff(org.x(), org.y(), org.z(), ray.tnear());
ray.dir = Vec3ff(dir.x(), dir.y(), dir.z(), ray.time ());
ray.tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
context->user->instStackSize = bvh_level;
Scene* scene = scenes[bvh_level];
#else
const unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_potential_hit);
/* assume software instancing mode by default (required for rtcForwardRay) */
Scene* scene = scenes[bvh_level];
/* if we are in hardware instancing mode and we need to read the scene from the instance */
if (bvh_level > 0 && instID != RTC_INVALID_GEOMETRY_ID) {
Instance* inst = scenes[0]->get<Instance>(instID);
scene = (Scene*) inst->object;
context->user->instID[0] = instID;
}
else if (bvh_level == 0)
context->user->instID[0] = RTC_INVALID_GEOMETRY_ID;
#endif
context->scene = scene;
Geometry* geom = scene->get(geomID);
/* perform ray masking */
#if defined(EMBREE_RAY_MASK)
if (ray.mask & geom->mask)
#endif
{
if (candidate == intel_candidate_type_procedural)
{
if (intersect_primitive(query,ray,scenes,geom,context,geomID,primID,feature_mask))
if (commit_potential_hit (query, ray))
break; // shadow rays break at first hit
}
else // if (candidate == TRIANGLE)
{
ray.tfar = t;
Vec3f Ng = intel_get_hit_triangle_normal(query, intel_hit_type_potential_hit);
Hit hit(context->user,geomID,primID,Vec2f(uv.x(),uv.y()),Ng);
if (invokeTriangleIntersectionFilter(query, geom, bvh_level, ray, hit, context, feature_mask))
break; // shadow rays break at first hit
}
}
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
}
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> ucontext, sycl::private_ptr<RTCRayHit> rayhit_i, sycl::private_ptr<RTCIntersectArguments> args)
{
Scene* scene = (Scene*) hscene.get();
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1];
scenes[0] = scene;
RayQueryContext context(scene, ucontext, args);
RayHit ray;
ray.org = Vec3ff(rayhit_i->ray.org_x, rayhit_i->ray.org_y, rayhit_i->ray.org_z, rayhit_i->ray.tnear);
ray.dir = Vec3ff(rayhit_i->ray.dir_x, rayhit_i->ray.dir_y, rayhit_i->ray.dir_z, rayhit_i->ray.time);
ray.tfar = rayhit_i->ray.tfar;
ray.mask = rayhit_i->ray.mask;
ray.id = rayhit_i->ray.id;
ray.flags = rayhit_i->ray.flags;
ray.Ng = Vec3f(0,0,0);
ray.u = 0;
ray.v = 0;
ray.primID = RTC_INVALID_GEOMETRY_ID;
ray.geomID = RTC_INVALID_GEOMETRY_ID;
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
for (uint32_t l=0; l<RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
ray.instID[l] = RTC_INVALID_GEOMETRY_ID;
}
#else
ray.instID[0] = RTC_INVALID_GEOMETRY_ID;
#endif
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = ray.tfar;
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_none;
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
raydesc.flags |= intel_ray_flags_force_non_opaque;
#endif
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
if (context.enforceArgumentFilterFunction())
raydesc.flags |= intel_ray_flags_force_non_opaque;
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_t query = intel_ray_query_init(raydesc, hwaccel_ptr);
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
if (args->feature_mask & TRAV_LOOP_FEATURES) {
trav_loop(query,ray,scenes,&context,args->feature_mask);
}
bool valid = intel_has_committed_hit(query);
if (valid)
{
float t = intel_get_hit_distance(query, intel_hit_type_committed_hit);
float2 uv = intel_get_hit_barycentrics (query, intel_hit_type_committed_hit);
unsigned int geomID = intel_get_hit_geometry_id(query, intel_hit_type_committed_hit);
unsigned int primID = ray.primID;
if (intel_get_hit_candidate(query, intel_hit_type_committed_hit) == intel_candidate_type_triangle)
primID = intel_get_hit_triangle_primitive_id(query, intel_hit_type_committed_hit);
rayhit_i->ray.tfar = t;
rayhit_i->hit.geomID = geomID;
rayhit_i->hit.primID = primID;
rayhit_i->hit.u = uv.x();
rayhit_i->hit.v = uv.y();
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
for (uint32_t l=0; l<RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
rayhit_i->hit.instID[l] = ray.instID[l];
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
rayhit_i->hit.instPrimID[l] = ray.instPrimID[l];
#endif
}
#else
unsigned int bvh_level = intel_get_hit_bvh_level(query, intel_hit_type_committed_hit);
unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_committed_hit);
/* when rtcForwardRay was used then we are in software instancing mode */
if (bvh_level > 0 && instID == RTC_INVALID_GEOMETRY_ID)
instID = ray.instID[0];
rayhit_i->hit.instID[0] = instID;
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
rayhit_i->hit.instPrimID[0] = ray.instPrimID[0];
#endif
#endif
/* calculate geometry normal for hardware accelerated triangles */
if (intel_get_hit_candidate(query, intel_hit_type_committed_hit) == intel_candidate_type_triangle)
ray.Ng = intel_get_hit_triangle_normal(query, intel_hit_type_committed_hit);
rayhit_i->hit.Ng_x = ray.Ng.x;
rayhit_i->hit.Ng_y = ray.Ng.y;
rayhit_i->hit.Ng_z = ray.Ng.z;
}
else
{
rayhit_i->hit.geomID = -1;
}
intel_ray_query_abandon(query);
}
SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> ucontext, sycl::private_ptr<RTCRay> ray_i, sycl::private_ptr<RTCOccludedArguments> args)
{
Scene* scene = (Scene*) hscene.get();
intel_raytracing_acceleration_structure_t hwaccel_ptr = (intel_raytracing_acceleration_structure_t) scene->hwaccel.data();
Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT+1];
scenes[0] = scene;
RayQueryContext context(scene, ucontext, args);
Ray ray;
ray.org = Vec3ff(ray_i->org_x, ray_i->org_y, ray_i->org_z, ray_i->tnear);
ray.dir = Vec3ff(ray_i->dir_x, ray_i->dir_y, ray_i->dir_z, ray_i->time);
ray.tfar = ray_i->tfar;
ray.mask = ray_i->mask;
ray.id = ray_i->id;
ray.flags = ray_i->flags;
intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
raydesc.tmin = ray.tnear();
raydesc.tmax = ray.tfar;
raydesc.mask = mask32_to_mask8(ray.mask);
raydesc.flags = intel_ray_flags_accept_first_hit_and_end_search;
#if defined(EMBREE_BACKFACE_CULLING)
raydesc.flags |= intel_ray_flags_cull_back_facing_triangles;
#endif
if (context.enforceArgumentFilterFunction())
raydesc.flags |= intel_ray_flags_force_non_opaque;
uint32_t bvh_id = 0;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) hwaccel_ptr;
if (args->feature_mask & RTC_FEATURE_FLAG_MOTION_BLUR) {
float time = clamp(ray.time(),0.0f,1.0f);
bvh_id = (uint32_t) clamp(uint32_t(hwaccel->numTimeSegments*time), 0u, hwaccel->numTimeSegments-1);
}
hwaccel_ptr = (intel_raytracing_acceleration_structure_t) hwaccel->AccelTable[bvh_id];
intel_ray_query_t query = intel_ray_query_init(raydesc, hwaccel_ptr);
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
if (args->feature_mask & TRAV_LOOP_FEATURES) {
trav_loop(query,ray,scenes,&context,args->feature_mask);
}
if (intel_has_committed_hit(query))
ray_i->tfar = -INFINITY;
intel_ray_query_abandon(query);
}
RTC_NAMESPACE_END;

View file

@ -0,0 +1,37 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-W#pragma-messages"
#include <sycl/sycl.hpp>
#pragma clang diagnostic pop
#include "../config.h"
#include "../../include/embree4/rtcore.h"
#include "../../include/embree4/rtcore_ray.h"
namespace embree
{
inline uint8_t mask32_to_mask8( uint32_t mask ) {
#if defined(EMBREE_RAY_MASK)
return (mask & 0xFFFFFF80) ? (0x80 | mask) : mask; // bit 7 indicates that some bit >= 7 is set
#else
return 1;
#endif
}
struct EmbreeHWAccel
{
uint32_t numTimeSegments;
void* AccelTable[1];
};
}
SYCL_EXTERNAL void rtcIntersectRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> context, sycl::private_ptr<RTCRayHit> rayhit, sycl::private_ptr<RTCIntersectArguments> args);
SYCL_EXTERNAL void rtcOccludedRTHW(sycl::global_ptr<RTCSceneTy> hscene, sycl::private_ptr<RTCRayQueryContext> context, sycl::private_ptr<RTCRay> ray, sycl::private_ptr<RTCOccludedArguments> args);

View file

@ -0,0 +1,652 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "rthwif_embree.h"
#include "rthwif_embree_builder.h"
#include "../common/scene.h"
#include "../builders/primrefgen.h"
#include "../level_zero/ze_wrapper.h"
namespace embree
{
using namespace embree::isa;
enum Flags : uint32_t {
NONE,
DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise <
};
static void align(size_t& ofs, size_t alignment) {
ofs = (ofs+(alignment-1))&(-alignment);
}
struct DispatchGlobals
{
uint64_t rtMemBasePtr; // base address of the allocated stack memory
uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0
uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks
uint32_t numDSSRTStacks : 16; // number of stacks per DSS
uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16
unsigned _reserved_mbz : 12;
uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...)
Flags flags; // per context control flags
static inline size_t GetDispatchGlobalsSize()
{
size_t maxBVHLevels = RTC_MAX_INSTANCE_LEVEL_COUNT+1;
size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64;
size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC
size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes;
return dispatchGlobalSize;
}
};
void* zeRTASInitExp(sycl::device device, sycl::context context)
{
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
return nullptr;
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
size_t dispatchGlobalSize = DispatchGlobals::GetDispatchGlobalsSize();
void* dispatchGlobalsPtr = rthwifAllocAccelBuffer(nullptr,dispatchGlobalSize,device,context);
memset(dispatchGlobalsPtr, 0, dispatchGlobalSize);
DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr;
dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize;
dg->callStackHandlerKSP = 0;
dg->asyncStackSize = 0;
dg->numDSSRTStacks = 0;
dg->syncRayQueryCount = 0;
dg->_reserved_mbz = 0;
dg->maxBVHLevels = RTC_MAX_INSTANCE_LEVEL_COUNT+1;
dg->flags = DEPTH_TEST_LESS_EQUAL;
return dispatchGlobalsPtr;
#else
return nullptr;
#endif
}
void rthwifCleanup(Device* embree_device, void* dispatchGlobalsPtr, sycl::context context)
{
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
size_t dispatchGlobalSize = DispatchGlobals::GetDispatchGlobalsSize();
rthwifFreeAccelBuffer(embree_device, dispatchGlobalsPtr, dispatchGlobalSize, context);
#endif
}
int rthwifIsSYCLDeviceSupported(const sycl::device& sycl_device)
{
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
return -1;
/* check if ray tracing extension is available */
sycl::platform platform = sycl_device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS) return -1;
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS) return -1;
bool ze_extension_ray_tracing = false;
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
//std::cout << extensions[i].name << " version " << extensions[i].version << std::endl;
if (strncmp("ZE_extension_raytracing",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_extension_ray_tracing = true;
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (!ze_extension_ray_tracing)
return -1;
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
if (!ze_rtas_builder)
return -1;
#endif
/* check if ray queries are supported */
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(sycl_device);
/* check if ray tracing hardware is supported */
ze_device_raytracing_ext_properties_t raytracing_properties;
memset(&raytracing_properties,0,sizeof(raytracing_properties));
raytracing_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES;
raytracing_properties.pNext = nullptr;
ze_device_module_properties_t module_properties;
memset(&module_properties,0,sizeof(module_properties));
module_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES;
module_properties.pNext = &raytracing_properties;
result = ZeWrapper::zeDeviceGetModuleProperties(hDevice, &module_properties);
if (result != ZE_RESULT_SUCCESS) return -1;
const bool rayQuerySupported = raytracing_properties.flags & ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY;
if (!rayQuerySupported)
return -1;
return sycl_device.get_info<sycl::info::device::max_compute_units>();
}
void* rthwifAllocAccelBuffer(Device* embree_device, size_t bytes, sycl::device device, sycl::context context)
{
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDeviceGetProperties properties failed");
ze_raytracing_mem_alloc_ext_desc_t rt_desc;
rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
rt_desc.pNext = nullptr;
rt_desc.flags = 0;
ze_relaxed_allocation_limits_exp_desc_t relaxed;
relaxed.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC;
relaxed.pNext = &rt_desc;
relaxed.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE;
ze_device_mem_alloc_desc_t device_desc;
device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
device_desc.pNext = &relaxed;
device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED;
device_desc.ordinal = 0;
ze_host_mem_alloc_desc_t host_desc;
host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
host_desc.pNext = nullptr;
host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
void* ptr = nullptr;
if (embree_device) embree_device->memoryMonitor(bytes,false);
ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr);
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"rtas memory allocation failed");
return ptr;
}
void rthwifFreeAccelBuffer(Device* embree_device, void* ptr, size_t bytes, sycl::context context)
{
if (ptr == nullptr) return;
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
if (embree_device) embree_device->memoryMonitor(-bytes,false);
ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr);
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"rtas memory free failed");
}
struct GEOMETRY_INSTANCE_DESC : ze_rtas_builder_instance_geometry_info_exp_t
{
ze_rtas_transform_float3x4_aligned_column_major_exp_t xfmdata;
};
struct GEOMETRY_TYPE
{
GEOMETRY_TYPE(ze_rtas_builder_geometry_type_exp_t type, size_t extraBytes = 0)
: type(type), extraBytes(extraBytes) {}
ze_rtas_builder_geometry_type_exp_t type;
size_t extraBytes;
};
size_t sizeof_RTHWIF_GEOMETRY(GEOMETRY_TYPE type)
{
switch (type.type) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return sizeof(ze_rtas_builder_triangles_geometry_info_exp_t)+type.extraBytes;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return sizeof(ze_rtas_builder_quads_geometry_info_exp_t)+type.extraBytes;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return sizeof(ze_rtas_builder_procedural_geometry_info_exp_t)+type.extraBytes;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return sizeof(ze_rtas_builder_instance_geometry_info_exp_t)+type.extraBytes;
default: assert(false); return 0;
}
}
size_t alignof_RTHWIF_GEOMETRY(GEOMETRY_TYPE type)
{
switch (type.type) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return alignof(ze_rtas_builder_triangles_geometry_info_exp_t);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return alignof(ze_rtas_builder_quads_geometry_info_exp_t);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return alignof(ze_rtas_builder_procedural_geometry_info_exp_t);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return alignof(ze_rtas_builder_instance_geometry_info_exp_t);
default: assert(false); return 0;
}
}
ze_rtas_builder_geometry_exp_flags_t getGeometryFlags(Scene* scene, Geometry* geom)
{
/* invoke any hit callback when Embree filter functions are present */
ze_rtas_builder_geometry_exp_flags_t gflags = 0;
if (geom->hasArgumentFilterFunctions() || geom->hasGeometryFilterFunctions())
gflags = ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE;
#if defined(EMBREE_RAY_MASK)
/* invoke any hit callback when high mask bits are enabled */
if (geom->mask & 0xFFFFFF80)
gflags = ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE;
#endif
return gflags;
}
void createGeometryDesc(ze_rtas_builder_triangles_geometry_info_exp_t* out, Scene* scene, TriangleMesh* geom)
{
memset(out,0,sizeof(ze_rtas_builder_triangles_geometry_info_exp_t));
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES;
out->geometryFlags = getGeometryFlags(scene,geom);
out->geometryMask = mask32_to_mask8(geom->mask);
out->triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32;
out->vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
out->pTriangleBuffer = (ze_rtas_triangle_indices_uint32_exp_t*) geom->triangles.getPtr();
out->triangleCount = geom->triangles.size();
out->triangleStride = geom->triangles.getStride();
out->pVertexBuffer = (ze_rtas_float3_exp_t*) geom->vertices0.getPtr();
out->vertexCount = geom->vertices0.size();
out->vertexStride = geom->vertices0.getStride();
}
void createGeometryDesc(ze_rtas_builder_quads_geometry_info_exp_t* out, Scene* scene, QuadMesh* geom)
{
memset(out,0,sizeof(ze_rtas_builder_quads_geometry_info_exp_t));
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS;
out->geometryFlags = getGeometryFlags(scene,geom);
out->geometryMask = mask32_to_mask8(geom->mask);
out->quadFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32;
out->vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
out->pQuadBuffer = (ze_rtas_quad_indices_uint32_exp_t*) geom->quads.getPtr();
out->quadCount = geom->quads.size();
out->quadStride = geom->quads.getStride();
out->pVertexBuffer = (ze_rtas_float3_exp_t*) geom->vertices0.getPtr();
out->vertexCount = geom->vertices0.size();
out->vertexStride = geom->vertices0.getStride();
}
void getProceduralAABB(ze_rtas_geometry_aabbs_exp_cb_params_t* params)
{
assert(params->stype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS);
BBox1f time_range = * (BBox1f*) params->pBuildUserPtr;
Geometry* geom = (Geometry*) params->pGeomUserPtr;
ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut;
for (uint32_t i=0; i<params->primIDCount; i++)
{
const uint32_t primID = params->primID+i;
PrimRef prim;
range<size_t> r(primID);
size_t k = 0;
uint32_t geomID = 0;
PrimInfo pinfo = empty;
if (geom->numTimeSegments() > 0)
pinfo = geom->createPrimRefArrayMB(&prim,time_range,r,k,geomID);
else
pinfo = geom->createPrimRefArray(&prim,r,k,geomID);
/* invalid primitive */
if (pinfo.size() == 0) {
boundsOut[i].lower.x = pos_inf;
boundsOut[i].lower.y = pos_inf;
boundsOut[i].lower.z = pos_inf;
boundsOut[i].upper.x = neg_inf;
boundsOut[i].upper.y = neg_inf;
boundsOut[i].upper.z = neg_inf;
}
else
{
BBox3fa bounds = prim.bounds();
boundsOut[i].lower.x = bounds.lower.x;
boundsOut[i].lower.y = bounds.lower.y;
boundsOut[i].lower.z = bounds.lower.z;
boundsOut[i].upper.x = bounds.upper.x;
boundsOut[i].upper.y = bounds.upper.y;
boundsOut[i].upper.z = bounds.upper.z;
}
}
};
void createGeometryDescProcedural(ze_rtas_builder_procedural_geometry_info_exp_t* out, Scene* scene, Geometry* geom)
{
uint32_t numPrimitives = geom->size();
if (GridMesh* mesh = dynamic_cast<GridMesh*>(geom))
numPrimitives = mesh->getNumTotalQuads(); // FIXME: slow
memset(out,0,sizeof(ze_rtas_builder_procedural_geometry_info_exp_t));
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
out->geometryFlags = 0;
out->geometryMask = mask32_to_mask8(geom->mask);
out->primCount = numPrimitives;
out->pfnGetBoundsCb = getProceduralAABB;
out->pGeomUserPtr = geom;
}
void createGeometryDesc(GEOMETRY_INSTANCE_DESC* out, Scene* scene, Instance* geom)
{
assert(geom->gsubtype == AccelSet::GTY_SUBTYPE_INSTANCE_QUATERNION);
memset(out,0,sizeof(GEOMETRY_INSTANCE_DESC));
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
out->instanceFlags = 0;
out->geometryMask = mask32_to_mask8(geom->mask);
out->instanceUserID = 0;
const AffineSpace3fa local2world = geom->getLocal2World();
out->transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR;
out->pTransform = (float*) &out->xfmdata;
out->pBounds = (ze_rtas_aabb_exp_t*) &dynamic_cast<Scene*>(geom->object)->hwaccel_bounds;
out->xfmdata = *(ze_rtas_transform_float3x4_aligned_column_major_exp_t*) &local2world;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) dynamic_cast<Scene*>(geom->object)->hwaccel.data();
out->pAccelerationStructure = hwaccel->AccelTable[0];
}
void createGeometryDesc(ze_rtas_builder_instance_geometry_info_exp_t* out, Scene* scene, Instance* geom)
{
assert(geom->gsubtype == AccelSet::GTY_SUBTYPE_DEFAULT);
memset(out,0,sizeof(ze_rtas_builder_instance_geometry_info_exp_t));
out->geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
out->instanceFlags = 0;
out->geometryMask = mask32_to_mask8(geom->mask);
out->instanceUserID = 0;
out->transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR;
out->pTransform = (float*) &geom->local2world[0];
out->pBounds = (ze_rtas_aabb_exp_t*) &dynamic_cast<Scene*>(geom->object)->hwaccel_bounds;
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) dynamic_cast<Scene*>(geom->object)->hwaccel.data();
out->pAccelerationStructure = hwaccel->AccelTable[0];
}
void createGeometryDesc(char* out, Scene* scene, Geometry* geom, GEOMETRY_TYPE type)
{
switch (type.type) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)out,scene,dynamic_cast<TriangleMesh*>(geom));
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t*)out,scene,dynamic_cast<QuadMesh*>(geom));
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryDescProcedural((ze_rtas_builder_procedural_geometry_info_exp_t*)out,scene,geom);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE:
if (type.extraBytes) return createGeometryDesc((GEOMETRY_INSTANCE_DESC*)out,scene,dynamic_cast<Instance*>(geom));
else return createGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t*)out,scene,dynamic_cast<Instance*>(geom));
default: assert(false);
}
}
ze_rtas_builder_build_quality_hint_exp_t convertBuildQuality(RTCBuildQuality quality_flags)
{
switch (quality_flags) {
case RTC_BUILD_QUALITY_LOW : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW;
case RTC_BUILD_QUALITY_MEDIUM : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
case RTC_BUILD_QUALITY_HIGH : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH;
case RTC_BUILD_QUALITY_REFIT : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW;
default : return ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
}
}
ze_rtas_builder_build_op_exp_flags_t convertBuildFlags(RTCSceneFlags scene_flags, RTCBuildQuality quality_flags)
{
ze_rtas_builder_build_op_exp_flags_t result = 0;
if (scene_flags & RTC_SCENE_FLAG_COMPACT) result |= ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_COMPACT;
/* only in high quality build mode spatial splits are allowed in Embree */
if (quality_flags != RTC_BUILD_QUALITY_HIGH)
result |= ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
return result;
}
BBox3f rthwifBuild(Scene* scene, AccelBuffer& accel)
{
DeviceGPU* gpuDevice = dynamic_cast<DeviceGPU*>(scene->device);
if (gpuDevice == nullptr) throw std::runtime_error("internal error");
if (scene->size() > 0x00FFFFFF)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "too many geometries inside scene");
sycl::device device = gpuDevice->getGPUDevice();
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
sycl::platform platform = device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
/* create L0 builder object */
ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC };
ze_rtas_builder_exp_handle_t hBuilder = nullptr;
ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "ze_rtas_builder creation failed");
auto getType = [&](unsigned int geomID) -> GEOMETRY_TYPE
{
/* no HW support for MB yet */
if (scene->get(geomID)->numTimeSegments() > 0)
return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
switch (scene->get(geomID)->getType()) {
case Geometry::GTY_FLAT_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ROUND_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_LINEAR_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_CONE_LINEAR_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_FLAT_BEZIER_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ROUND_BEZIER_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_BEZIER_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_FLAT_BSPLINE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ROUND_BSPLINE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_BSPLINE_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_FLAT_HERMITE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ROUND_HERMITE_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_HERMITE_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_FLAT_CATMULL_ROM_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ROUND_CATMULL_ROM_CURVE : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_CATMULL_ROM_CURVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_TRIANGLE_MESH: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES; break;
case Geometry::GTY_QUAD_MESH : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS; break;
case Geometry::GTY_GRID_MESH : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_SUBDIV_MESH : assert(false); return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_SPHERE_POINT : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_DISC_POINT : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_ORIENTED_DISC_POINT: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_USER_GEOMETRY : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_INSTANCE_ARRAY : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
#if RTC_MAX_INSTANCE_LEVEL_COUNT < 2
case Geometry::GTY_INSTANCE_CHEAP :
case Geometry::GTY_INSTANCE_EXPENSIVE: {
Instance* instance = scene->get<Instance>(geomID);
EmbreeHWAccel* object = (EmbreeHWAccel*)((Scene*)instance->object)->hwaccel.data();
if (object->numTimeSegments > 1) return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; // we need to handle instances in procedural mode if instanced scene has motion blur
if (instance->mask & 0xFFFFFF80) return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; // we need to handle instances in procedural mode if high mask bits are set
else if (instance->gsubtype == AccelSet::GTY_SUBTYPE_INSTANCE_QUATERNION)
return GEOMETRY_TYPE(ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE,sizeof(GEOMETRY_INSTANCE_DESC)-sizeof(ze_rtas_builder_instance_geometry_info_exp_t));
else return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
}
#else
case Geometry::GTY_INSTANCE_CHEAP : return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
case Geometry::GTY_INSTANCE_EXPENSIVE: return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL; break;
#endif
default: assert(false); return ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
}
};
/* calculate maximal number of motion blur time segments in scene */
uint32_t maxTimeSegments = 1;
for (size_t geomID=0; geomID<scene->size(); geomID++)
{
Geometry* geom = scene->get(geomID);
if (geom == nullptr) continue;
maxTimeSegments = std::max(maxTimeSegments, geom->numTimeSegments());
}
/* calculate size of geometry descriptor buffer */
size_t totalBytes = 0;
for (size_t geomID=0; geomID<scene->size(); geomID++)
{
Geometry* geom = scene->get(geomID);
if (geom == nullptr) continue;
const GEOMETRY_TYPE type = getType(geomID);
align(totalBytes,alignof_RTHWIF_GEOMETRY(type));
totalBytes += sizeof_RTHWIF_GEOMETRY(type);
}
/* fill geomdesc buffers */
mvector<ze_rtas_builder_geometry_info_exp_t*> geomDescr(scene->device, scene->size());
mvector<char> geomDescrData(scene->device,totalBytes);
size_t offset = 0;
for (size_t geomID=0; geomID<scene->size(); geomID++)
{
geomDescr[geomID] = nullptr;
Geometry* geom = scene->get(geomID);
if (geom == nullptr) continue;
const GEOMETRY_TYPE type = getType(geomID);
align(offset,alignof_RTHWIF_GEOMETRY(type));
createGeometryDesc(&geomDescrData[offset],scene,scene->get(geomID),type);
geomDescr[geomID] = (ze_rtas_builder_geometry_info_exp_t*) &geomDescrData[offset];
offset += sizeof_RTHWIF_GEOMETRY(type);
assert(offset <= geomDescrData.size());
}
ze_rtas_parallel_operation_exp_handle_t parallelOperation = nullptr;
err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &parallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "parallel operation creation failed");
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDeviceGetProperties properties failed");
/* estimate static accel size */
BBox1f time_range(0,1);
ze_rtas_aabb_exp_t bounds;
ze_rtas_builder_build_op_exp_desc_t args;
memset(&args,0,sizeof(args));
args.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC;
args.pNext = nullptr;
args.rtasFormat = rtasProp.rtasFormat;
args.buildQuality = convertBuildQuality(scene->quality_flags);
args.buildFlags = convertBuildFlags(scene->scene_flags,scene->quality_flags);
args.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t**) geomDescr.data();
args.numGeometries = geomDescr.size();
/* just for debugging purposes */
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC };
buildOpDebug.dispatchGlobalsPtr = dynamic_cast<DeviceGPU*>(scene->device)->dispatchGlobalsPtr;
args.pNext = &buildOpDebug;
#endif
ze_rtas_builder_exp_properties_t sizeTotal = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&args,&sizeTotal);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN,"BVH size estimate failed");
/* allocate scratch buffer */
mvector<char> scratchBuffer(scene->device,sizeTotal.scratchBufferSizeBytes);
size_t headerBytes = sizeof(EmbreeHWAccel) + std::max(1u,maxTimeSegments)*8;
align(headerBytes,128);
/* build BVH */
BBox3f fullBounds = empty;
while (true)
{
/* estimate size of all mblur BVHs */
ze_rtas_builder_exp_properties_t size = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
size.rtasBufferSizeBytesExpected = maxTimeSegments*sizeTotal.rtasBufferSizeBytesExpected;
size.rtasBufferSizeBytesMaxRequired = maxTimeSegments*sizeTotal.rtasBufferSizeBytesMaxRequired;
size_t bytes = headerBytes+size.rtasBufferSizeBytesExpected;
/* allocate BVH data */
if (accel.size() < bytes) accel.resize(bytes);
memset(accel.data(),0,accel.size()); // FIXME: not required
/* build BVH for each time segment */
for (uint32_t i=0; i<maxTimeSegments; i++)
{
const float t0 = float(i+0)/float(maxTimeSegments);
const float t1 = float(i+1)/float(maxTimeSegments);
time_range = BBox1f(t0,t1);
void* accelBuffer = accel.data() + headerBytes + i*sizeTotal.rtasBufferSizeBytesExpected;
size_t accelBufferBytes = sizeTotal.rtasBufferSizeBytesExpected;
bounds = { { INFINITY, INFINITY, INFINITY }, { -INFINITY, -INFINITY, -INFINITY } };
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&args,
scratchBuffer.data(),scratchBuffer.size(),
accelBuffer, accelBufferBytes,
parallelOperation,
&time_range, &bounds, nullptr);
if (parallelOperation)
{
assert(err == ZE_RESULT_EXP_RTAS_BUILD_DEFERRED);
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(parallelOperation,&prop);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "get max concurrency failed");
parallel_for(prop.maxConcurrency, [&](uint32_t) { err = ZeWrapper::zeRTASParallelOperationJoinExp(parallelOperation); });
}
fullBounds.extend(*(BBox3f*) &bounds);
if (err == ZE_RESULT_EXP_RTAS_BUILD_RETRY)
{
if (sizeTotal.rtasBufferSizeBytesExpected == sizeTotal.rtasBufferSizeBytesMaxRequired)
throw_RTCError(RTC_ERROR_UNKNOWN,"build error");
sizeTotal.rtasBufferSizeBytesExpected = std::min(sizeTotal.rtasBufferSizeBytesMaxRequired,(size_t(1.2*sizeTotal.rtasBufferSizeBytesExpected)+127)&-128);
break;
}
if (err != ZE_RESULT_SUCCESS) break;
}
if (err != ZE_RESULT_EXP_RTAS_BUILD_RETRY) break;
}
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN,"build error");
/* destroy parallel operation */
err = ZeWrapper::zeRTASParallelOperationDestroyExp(parallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "parallel operation destruction failed");
/* destroy rtas builder again */
err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "ze_rtas_builder destruction failed");
EmbreeHWAccel* hwaccel = (EmbreeHWAccel*) accel.data();
hwaccel->numTimeSegments = maxTimeSegments;
for (size_t i=0; i<maxTimeSegments; i++)
hwaccel->AccelTable[i] = (char*)hwaccel + headerBytes + i*sizeTotal.rtasBufferSizeBytesExpected;
return fullBounds;
}
}

View file

@ -0,0 +1,74 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/sys/platform.h"
#include "../../common/sys/sycl.h"
#include "../../common/sys/vector.h"
#include "../../common/math/bbox.h"
#include "../../include/embree4/rtcore.h"
namespace embree
{
class Scene;
void* rthwifAllocAccelBuffer(Device* embree_device, size_t bytes, sycl::device device, sycl::context context);
void rthwifFreeAccelBuffer(Device* embree_device, void* ptr, size_t bytes, sycl::context context);
/*! allocator that performs BVH memory allocations */
template<typename T>
struct AccelAllocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
AccelAllocator()
: device(nullptr), context(nullptr) {}
AccelAllocator(Device* embree_device, const sycl::device& device, const sycl::context& context)
: embree_device(embree_device), device(&device), context(&context) {}
__forceinline pointer allocate( size_type n ) {
if (context && device)
return (pointer) rthwifAllocAccelBuffer(embree_device,n*sizeof(T),*device,*context);
else
return nullptr;
}
__forceinline void deallocate( pointer p, size_type n ) {
if (context)
rthwifFreeAccelBuffer(embree_device,p,n*sizeof(T),*context);
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
private:
Device* embree_device;
const sycl::device* device;
const sycl::context* context;
};
typedef vector_t<char,AccelAllocator<char>> AccelBuffer;
void* zeRTASInitExp(sycl::device device, sycl::context context);
void rthwifCleanup(Device* embree_device, void* dispatchGlobalsPtr, sycl::context context);
int rthwifIsSYCLDeviceSupported(const sycl::device& sycl_device);
BBox3f rthwifBuild(Scene* scene, AccelBuffer& buffer_o);
}