rendering-in-cgi/Framework/external/embree/kernels/rthwif/testing/rthwif_test.cpp
2024-04-23 10:14:24 +02:00

2267 lines
80 KiB
C++

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define NOMINMAX
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#include <CL/sycl.hpp>
#include "tbb/tbb.h"
#if defined(ZE_RAYTRACING)
#include "../rtbuild/sys/sysinfo.h"
#include "../rtbuild/sys/vector.h"
#include "../rtbuild/math/vec2.h"
#include "../rtbuild/math/vec3.h"
#include "../rtbuild/math/bbox.h"
#include "../rtbuild/math/affinespace.h"
#else
#include "../../../common/sys/sysinfo.h"
#include "../../../common/sys/vector.h"
#include "../../../common/math/vec2.h"
#include "../../../common/math/vec3.h"
#include "../../../common/math/bbox.h"
#include "../../../common/math/lbbox.h"
#include "../../../common/math/affinespace.h"
#endif
#define _USE_MATH_DEFINES
#include <math.h>
#include "../rttrace/rttrace.h"
#include <level_zero/ze_wrapper.h>
#include <vector>
#include <map>
#include <iostream>
#include <fstream>
namespace embree {
double getSeconds();
}
sycl::device device;
sycl::context context;
void* dispatchGlobalsPtr = nullptr;
struct RandomSampler {
unsigned int s;
};
unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k)
{
const unsigned int c1 = 0xcc9e2d51;
const unsigned int c2 = 0x1b873593;
const unsigned int r1 = 15;
const unsigned int r2 = 13;
const unsigned int m = 5;
const unsigned int n = 0xe6546b64;
k *= c1;
k = (k << r1) | (k >> (32 - r1));
k *= c2;
hash ^= k;
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
return hash;
}
unsigned int MurmurHash3_finalize(unsigned int hash)
{
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
}
unsigned int LCG_next(unsigned int value)
{
const unsigned int m = 1664525;
const unsigned int n = 1013904223;
return value * m + n;
}
void RandomSampler_init(RandomSampler& self, int id)
{
unsigned int hash = 0;
hash = MurmurHash3_mix(hash, id);
hash = MurmurHash3_finalize(hash);
self.s = hash;
}
int RandomSampler_getInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s >> 1;
}
unsigned int RandomSampler_getUInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s;
}
float RandomSampler_getFloat(RandomSampler& self) {
return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f;
}
sycl::float3 RandomSampler_getFloat3(RandomSampler& self)
{
const float x = RandomSampler_getFloat(self);
const float y = RandomSampler_getFloat(self);
const float z = RandomSampler_getFloat(self);
return sycl::float3(x,y,z);
}
RandomSampler rng;
ze_rtas_builder_exp_handle_t hBuilder = nullptr;
ze_rtas_parallel_operation_exp_handle_t parallelOperation = nullptr;
enum class InstancingType
{
NONE,
SW_INSTANCING,
HW_INSTANCING
};
enum class TestType
{
TRIANGLES_COMMITTED_HIT, // triangles
TRIANGLES_POTENTIAL_HIT, // triangles + filter + check potential hit
TRIANGLES_ANYHIT_SHADER_COMMIT, // triangles + filter + commit
TRIANGLES_ANYHIT_SHADER_REJECT, // triangles + filter + reject
PROCEDURALS_COMMITTED_HIT, // procedural triangles
BUILD_TEST_TRIANGLES, // test BVH builder with triangles
BUILD_TEST_PROCEDURALS, // test BVH builder with procedurals
BUILD_TEST_INSTANCES, // test BVH builder with instances
BUILD_TEST_MIXED, // test BVH builder with mixed scene (triangles, procedurals, and instances)
BENCHMARK_TRIANGLES, // benchmark BVH builder with triangles
BENCHMARK_PROCEDURALS, // benchmark BVH builder with procedurals
};
enum class BuildMode
{
BUILD_EXPECTED_SIZE,
BUILD_WORST_CASE_SIZE
};
struct TestInput
{
sycl::float3 org;
sycl::float3 dir;
float tnear;
float tfar;
uint32_t mask;
uint32_t flags;
};
enum TestHitType {
TEST_COMMITTED_HIT,
TEST_POTENTIAL_HIT,
TEST_MISS
};
struct TestOutput
{
// Ray data at level 0
sycl::float3 ray0_org;
sycl::float3 ray0_dir;
float ray0_tnear;
uint32_t ray0_mask;
uint32_t ray0_flags;
// Ray data at hit bvh_level
sycl::float3 rayN_org;
sycl::float3 rayN_dir;
float rayN_tnear;
uint32_t rayN_mask;
uint32_t rayN_flags;
// Hit data
TestHitType hit_type;
uint32_t bvh_level;
uint32_t hit_candidate;
float t;
float u;
float v;
bool front_face;
uint32_t geomID;
uint32_t primID;
uint32_t instID;
uint32_t instUserID;
sycl::float3 v0;
sycl::float3 v1;
sycl::float3 v2;
intel_float4x3 world_to_object;
intel_float4x3 object_to_world;
};
std::ostream& operator<<(std::ostream& out, const intel_float3& v) {
return out << "(" << v.x << "," << v.y << "," << v.z << ")";
}
void compareTestOutput(uint32_t tid, uint32_t& errors, const TestOutput& test, const TestOutput& expected)
{
#define COMPARE(member) \
if (test.member != expected.member) { \
if (errors < 16) \
std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \
errors++; \
}
#define COMPARE1(member,eps) \
if (fabs(test.member-expected.member) > eps) { \
if (errors < 16) \
std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \
errors++; \
}
#define COMPARE3(member,eps) { \
const bool x = fabs(test.member.x()-expected.member.x()) > eps; \
const bool y = fabs(test.member.y()-expected.member.y()) > eps; \
const bool z = fabs(test.member.z()-expected.member.z()) > eps; \
if (x || y || z) { \
if (errors < 16) \
std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \
errors++; \
} \
}
#define COMPARE3I(member,eps) { \
const bool x = test.member.x != expected.member.x; \
const bool y = test.member.y != expected.member.y; \
const bool z = test.member.z != expected.member.z; \
if (x || y || z) { \
if (errors < 16) \
std::cout << "test" << tid << " " #member " mismatch: output " << test.member << " != expected " << expected.member << std::endl; \
errors++; \
} \
}
float eps = 2E-4;
COMPARE3(ray0_org,0);
COMPARE3(ray0_dir,0);
COMPARE1(ray0_tnear,0);
COMPARE(ray0_mask);
COMPARE(ray0_flags);
COMPARE3(rayN_org,eps);
COMPARE3(rayN_dir,eps);
COMPARE1(rayN_tnear,eps);
COMPARE(rayN_mask);
COMPARE(rayN_flags);
COMPARE(hit_type);
COMPARE(bvh_level);
COMPARE(hit_candidate);
COMPARE1(t,eps);
COMPARE1(u,eps);
COMPARE1(v,eps);
COMPARE(front_face);
COMPARE(geomID);
COMPARE(primID);
COMPARE(instID);
COMPARE(instUserID);
COMPARE3(v0,eps);
COMPARE3(v1,eps);
COMPARE3(v2,eps);
COMPARE3I(world_to_object.vx,eps);
COMPARE3I(world_to_object.vy,eps);
COMPARE3I(world_to_object.vz,eps);
COMPARE3I(world_to_object.p ,eps);
COMPARE3I(object_to_world.vx,eps);
COMPARE3I(object_to_world.vy,eps);
COMPARE3I(object_to_world.vz,eps);
COMPARE3I(object_to_world.p ,eps);
}
struct LinearSpace3f
{
/*! matrix construction from column vectors */
LinearSpace3f(const sycl::float3& vx, const sycl::float3& vy, const sycl::float3& vz)
: vx(vx), vy(vy), vz(vz) {}
/*! matrix construction from row mayor data */
LinearSpace3f(const float m00, const float m01, const float m02,
const float m10, const float m11, const float m12,
const float m20, const float m21, const float m22)
: vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {}
/*! compute the determinant of the matrix */
const float det() const { return sycl::dot(vx,sycl::cross(vy,vz)); }
/*! compute adjoint matrix */
const LinearSpace3f adjoint() const { return LinearSpace3f(sycl::cross(vy,vz),sycl::cross(vz,vx),sycl::cross(vx,vy)).transposed(); }
/*! compute inverse matrix */
const LinearSpace3f inverse() const
{
const float d = det();
const LinearSpace3f a = adjoint();
return { a.vx/d, a.vy/d, a.vz/d };
}
/*! compute transposed matrix */
const LinearSpace3f transposed() const { return LinearSpace3f(vx.x(),vx.y(),vx.z(),vy.x(),vy.y(),vy.z(),vz.x(),vz.y(),vz.z()); }
/*! return matrix for rotation around arbitrary axis */
static LinearSpace3f rotate(const sycl::float3 _u, const float r) {
sycl::float3 u = normalize(_u);
float s = sinf(r), c = cosf(r);
return LinearSpace3f(u.x()*u.x()+(1-u.x()*u.x())*c, u.x()*u.y()*(1-c)-u.z()*s, u.x()*u.z()*(1-c)+u.y()*s,
u.x()*u.y()*(1-c)+u.z()*s, u.y()*u.y()+(1-u.y()*u.y())*c, u.y()*u.z()*(1-c)-u.x()*s,
u.x()*u.z()*(1-c)-u.y()*s, u.y()*u.z()*(1-c)+u.x()*s, u.z()*u.z()+(1-u.z()*u.z())*c);
}
public:
sycl::float3 vx,vy,vz;
};
sycl::float3 xfmPoint (const LinearSpace3f& m, const sycl::float3& p) {
return p.x()*m.vx + (p.y()*m.vy + p.z()*m.vz);
}
struct Transform
{
Transform ()
: vx(1,0,0), vy(0,1,0), vz(0,0,1), p(0,0,0) {}
Transform ( sycl::float3 vx, sycl::float3 vy, sycl::float3 vz, sycl::float3 p )
: vx(vx), vy(vy), vz(vz), p(p) {}
Transform ( intel_float4x3 xfm )
: vx(xfm.vx), vy(xfm.vy), vz(xfm.vz), p(xfm.p) {}
operator intel_float4x3 () const {
return { vx, vy, vz, p };
}
sycl::float3 vx,vy,vz,p;
};
std::ostream& operator<<(std::ostream& out, const Transform& t) {
return out << " Transform {" << t.vx << ", " << t.vy << ", " << t.vz << ", " << t.p << "}";
}
sycl::float3 xfmPoint (const Transform& m, const sycl::float3& p) {
return p.x()*m.vx + (p.y()*m.vy + (p.z()*m.vz + m.p));
}
sycl::float3 xfmVector (const Transform& m, const sycl::float3& v) {
return v.x()*m.vx + (v.y()*m.vy + v.z()*m.vz);
}
Transform operator* (const Transform& a, const Transform& b) {
return Transform(xfmVector(a,b.vx),xfmVector(a,b.vy),xfmVector(a,b.vz),xfmPoint(a,b.p));
}
Transform rcp( const Transform& a )
{
#if 1 // match builder math for rcp to have bit accurate data to compare against
embree::Vec3f vx(a.vx.x(), a.vx.y(), a.vx.z());
embree::Vec3f vy(a.vy.x(), a.vy.y(), a.vy.z());
embree::Vec3f vz(a.vz.x(), a.vz.y(), a.vz.z());
embree::Vec3f p(a. p.x(), a. p.y(), a. p.z());
embree::AffineSpace3f l(embree::LinearSpace3f(vx,vy,vz),p);
embree::AffineSpace3f il = rcp(l);
sycl::float3 ivx(il.l.vx.x, il.l.vx.y, il.l.vx.z);
sycl::float3 ivy(il.l.vy.x, il.l.vy.y, il.l.vy.z);
sycl::float3 ivz(il.l.vz.x, il.l.vz.y, il.l.vz.z);
sycl::float3 ip(il.p.x, il.p.y, il.p.z);
return Transform(ivx,ivy,ivz,ip);
#else
const LinearSpace3f l = { a.vx, a.vy, a.vz };
const LinearSpace3f il = l.inverse();
return Transform(il.vx, il.vy, il.vz, -xfmPoint(il,a.p));
#endif
}
Transform RandomSampler_getTransform(RandomSampler& self)
{
const sycl::float3 u = RandomSampler_getFloat3(self) + sycl::float3(0.01f);
const float r = 2.0f*M_PI*RandomSampler_getFloat(self);
const sycl::float3 p = 10.0f*RandomSampler_getFloat3(self);
const LinearSpace3f xfm = LinearSpace3f::rotate(u,r);
return Transform(xfm.vx,xfm.vy,xfm.vz,p);
}
struct Bounds3f
{
void extend( sycl::float3 p ) {
lower = sycl::min(lower,p);
upper = sycl::max(upper,p);
}
static Bounds3f empty() {
return { sycl::float3(INFINITY), sycl::float3(-INFINITY) };
}
operator ze_rtas_aabb_exp_t () const {
return { { lower.x(), lower.y(), lower.z() }, { upper.x(), upper.y(), upper.z() } };
}
sycl::float3 lower;
sycl::float3 upper;
};
std::ostream& operator<<(std::ostream& out, const Bounds3f& b) {
return out << "Bounds3f {" << b.lower << "," << b.upper << "}";
}
const Bounds3f xfmBounds(const Transform& m, const Bounds3f& b)
{
Bounds3f dst = Bounds3f::empty();
const sycl::float3 p0(b.lower.x(),b.lower.y(),b.lower.z()); dst.extend(xfmPoint(m,p0));
const sycl::float3 p1(b.lower.x(),b.lower.y(),b.upper.z()); dst.extend(xfmPoint(m,p1));
const sycl::float3 p2(b.lower.x(),b.upper.y(),b.lower.z()); dst.extend(xfmPoint(m,p2));
const sycl::float3 p3(b.lower.x(),b.upper.y(),b.upper.z()); dst.extend(xfmPoint(m,p3));
const sycl::float3 p4(b.upper.x(),b.lower.y(),b.lower.z()); dst.extend(xfmPoint(m,p4));
const sycl::float3 p5(b.upper.x(),b.lower.y(),b.upper.z()); dst.extend(xfmPoint(m,p5));
const sycl::float3 p6(b.upper.x(),b.upper.y(),b.lower.z()); dst.extend(xfmPoint(m,p6));
const sycl::float3 p7(b.upper.x(),b.upper.y(),b.upper.z()); dst.extend(xfmPoint(m,p7));
return dst;
}
struct Triangle
{
Triangle()
: v0(0.f,0.f,0.f), v1(0.f,0.f,0.f), v2(0.f,0.f,0.f), index(0) {}
Triangle (sycl::float3 v0, sycl::float3 v1, sycl::float3 v2, uint32_t index)
: v0(v0), v1(v1), v2(v2), index(index) {}
sycl::float3 sample(float u, float v) const {
return (1.0f-u-v)*v0 + u*v1 + v*v2;
}
sycl::float3 center() const {
return (v0+v1+v2)/3.0f;
}
Bounds3f bounds() const
{
const sycl::float3 lower = sycl::min(v0,sycl::min(v1,v2));
const sycl::float3 upper = sycl::max(v0,sycl::max(v1,v2));
return { lower, upper };
}
const Triangle transform( Transform xfm ) const {
return Triangle(xfmPoint(xfm,v0), xfmPoint(xfm,v1), xfmPoint(xfm,v2), index);
}
sycl::float3 v0;
sycl::float3 v1;
sycl::float3 v2;
uint32_t index;
};
struct less_float3 {
bool operator() ( const sycl::float3& a, const sycl::float3& b ) const {
if (a.x() != b.x()) return a.x() < b.x();
if (a.y() != b.y()) return a.y() < b.y();
if (a.z() != b.z()) return a.z() < b.z();
return false;
}
};
std::ostream& operator<<(std::ostream& out, const Triangle& tri) {
return out << "Triangle {" << tri.v0 << "," << tri.v1 << "," << tri.v2 << "}";
}
struct Hit
{
Transform local_to_world;
Triangle triangle;
bool procedural_triangle = false;
bool procedural_instance = false;
uint32_t instUserID = -1;
uint32_t instID = -1;
uint32_t geomID = -1;
uint32_t primID = -1;
};
struct GEOMETRY_INSTANCE_DESC : ze_rtas_builder_instance_geometry_info_exp_t
{
ze_rtas_transform_float3x4_aligned_column_major_exp_t xfmdata;
};
typedef union GEOMETRY_DESC
{
ze_rtas_builder_geometry_type_exp_t geometryType;
ze_rtas_builder_triangles_geometry_info_exp_t Triangles;
ze_rtas_builder_quads_geometry_info_exp_t Quads;
ze_rtas_builder_procedural_geometry_info_exp_t AABBs;
GEOMETRY_INSTANCE_DESC Instance;
} GEOMETRY_DESC;
struct Geometry
{
enum Type {
TRIANGLE_MESH,
INSTANCE
};
Geometry (Type type)
: type(type) {}
virtual void getDesc(GEOMETRY_DESC* desc) = 0;
virtual void transform( const Transform xfm) {
throw std::runtime_error("Geometry::transform not implemented");
}
virtual void buildAccel(sycl::device& device, sycl::context& context, BuildMode buildMode, ze_rtas_builder_build_quality_hint_exp_t quality) {
};
virtual void buildTriMap(Transform local_to_world, std::vector<uint32_t> id_stack, uint32_t instUserID, bool procedural_instance, std::vector<Hit>& tri_map) = 0;
virtual size_t getNumPrimitives() const = 0;
Type type;
};
struct TriangleMesh : public Geometry
{
public:
TriangleMesh (ze_rtas_builder_geometry_exp_flags_t gflags = 0, bool procedural = false)
: Geometry(Type::TRIANGLE_MESH),
gflags(gflags), procedural(procedural),
triangles_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), triangles(0,triangles_alloc),
vertices_alloc (context,device,sycl::ext::oneapi::property::usm::device_read_only()), vertices(0,vertices_alloc) {}
virtual ~TriangleMesh() {}
void* operator new(size_t size) {
return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only());
}
void operator delete(void* ptr) {
sycl::free(ptr,context);
}
size_t size() const {
return triangles.size();
}
virtual void transform( const Transform xfm) override
{
for (size_t i=0; i<vertices.size(); i++)
vertices[i] = xfmPoint(xfm,vertices[i]);
}
static void getBoundsCallback (ze_rtas_geometry_aabbs_exp_cb_params_t* params)
{
assert(params->stype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS);
const TriangleMesh* mesh = (TriangleMesh*) params->pGeomUserPtr;
for (uint32_t i=0; i<params->primIDCount; i++)
{
const uint32_t primID = params->primID+i;
const Bounds3f bounds = mesh->getBounds(primID);
ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut;
boundsOut[i].lower.x = bounds.lower.x();
boundsOut[i].lower.y = bounds.lower.y();
boundsOut[i].lower.z = bounds.lower.z();
boundsOut[i].upper.x = bounds.upper.x();
boundsOut[i].upper.y = bounds.upper.y();
boundsOut[i].upper.z = bounds.upper.z();
}
}
virtual void getDesc(GEOMETRY_DESC* desc) override
{
if (procedural)
{
ze_rtas_builder_procedural_geometry_info_exp_t& out = desc->AABBs;
memset(&out,0,sizeof(out));
out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
out.geometryFlags = gflags;
out.geometryMask = 0xFF;
out.primCount = triangles.size();
out.pfnGetBoundsCb = TriangleMesh::getBoundsCallback;
out.pGeomUserPtr = this;
}
else
{
ze_rtas_builder_triangles_geometry_info_exp_t& out = desc->Triangles;
memset(&out,0,sizeof(out));
out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES;
out.geometryFlags = gflags;
out.geometryMask = 0xFF;
out.triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32;
out.vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
out.pTriangleBuffer = (ze_rtas_triangle_indices_uint32_exp_t*) triangles.data();
out.triangleCount = triangles.size();
out.triangleStride = sizeof(sycl::int4);
out.pVertexBuffer = (ze_rtas_float3_exp_t*) vertices.data();
out.vertexCount = vertices.size();
out.vertexStride = sizeof(sycl::float3);
}
}
Triangle getTriangle( const uint32_t primID ) const
{
const sycl::float3 v0 = vertices[triangles[primID].x()];
const sycl::float3 v1 = vertices[triangles[primID].y()];
const sycl::float3 v2 = vertices[triangles[primID].z()];
const uint32_t index = triangles[primID].w();
return Triangle(v0,v1,v2,index);
}
Bounds3f getBounds( const uint32_t primID ) const {
return getTriangle(primID).bounds();
}
uint32_t addVertex( const sycl::float3& v )
{
auto e = vertex_map.find(v);
if (e != vertex_map.end()) return e->second;
vertices.push_back(v);
vertex_map[v] = vertices.size()-1;
return vertices.size()-1;
}
void addTriangle( const Triangle& tri )
{
const uint32_t v0 = addVertex(tri.v0);
const uint32_t v1 = addVertex(tri.v1);
const uint32_t v2 = addVertex(tri.v2);
triangles.push_back(sycl::int4(v0,v1,v2,tri.index));
}
void split(const sycl::float3 P, const sycl::float3 N, std::shared_ptr<TriangleMesh>& mesh0, std::shared_ptr<TriangleMesh>& mesh1)
{
mesh0 = std::shared_ptr<TriangleMesh>(new TriangleMesh(gflags,procedural));
mesh1 = std::shared_ptr<TriangleMesh>(new TriangleMesh(gflags,procedural));
for (uint32_t primID=0; primID<(uint32_t) size(); primID++)
{
const Triangle tri = getTriangle(primID);
if (sycl::dot(tri.center()-P,N) < 0.0f) mesh0->addTriangle(tri);
else mesh1->addTriangle(tri);
}
}
void split(std::shared_ptr<TriangleMesh>& mesh0, std::shared_ptr<TriangleMesh>& mesh1)
{
uint32_t N = (uint32_t) size();
mesh0 = std::shared_ptr<TriangleMesh>(new TriangleMesh(gflags,procedural));
mesh1 = std::shared_ptr<TriangleMesh>(new TriangleMesh(gflags,procedural));
mesh0->triangles.reserve(triangles.size()/2+1);
mesh1->triangles.reserve(triangles.size()/2+1);
mesh0->vertices.reserve(vertices.size()/2+8);
mesh1->vertices.reserve(vertices.size()/2+8);
for (uint32_t primID=0; primID<N; primID++)
{
const Triangle tri = getTriangle(primID);
if (primID<N/2) mesh0->addTriangle(tri);
else mesh1->addTriangle(tri);
}
}
/* selects random sub-set of triangles */
void selectRandom(const uint32_t numTriangles)
{
assert(numTriangles <= size());
/* first randomize triangles */
for (size_t i=0; i<size(); i++) {
uint32_t j = RandomSampler_getUInt(rng) % size();
std::swap(triangles[i],triangles[j]);
}
/* now we can easily select a random set of triangles */
triangles.resize(numTriangles);
/* now we sort the triangles again */
std::sort(triangles.begin(), triangles.end(), []( sycl::int4 a, sycl::int4 b ) { return a.w() < b.w(); });
/* and assign consecutive IDs */
for (uint32_t i=0; i<numTriangles; i++)
triangles[i].w() = i;
}
/* selects sequential sub-set of triangles */
void selectSequential(const uint32_t numTriangles)
{
assert(numTriangles <= size());
/* now we can easily select a random set of triangles */
triangles.resize(numTriangles);
}
/* creates separate vertives for triangles */
void unshareVertices()
{
vertices.reserve(vertices.size()+3*triangles.size());
for (size_t i=0; i<triangles.size(); i++) {
const sycl::int4 tri = triangles[i];
const uint32_t v0 = (uint32_t) vertices.size();
vertices.push_back(vertices[tri.x()]);
const uint32_t v1 = (uint32_t) vertices.size();
vertices.push_back(vertices[tri.y()]);
const uint32_t v2 = (uint32_t) vertices.size();
vertices.push_back(vertices[tri.z()]);
triangles[i] = sycl::int4(v0,v1,v2,tri.w());
}
}
virtual void buildTriMap(Transform local_to_world, std::vector<uint32_t> id_stack, uint32_t instUserID, bool procedural_instance, std::vector<Hit>& tri_map) override
{
uint32_t instID = -1;
uint32_t geomID = -1;
if (id_stack.size()) {
geomID = id_stack.back();
id_stack.pop_back();
}
if (id_stack.size()) {
instID = id_stack.back();
id_stack.pop_back();
}
assert(id_stack.size() == 0);
for (uint32_t primID=0; primID<triangles.size(); primID++)
{
const Triangle tri = getTriangle(primID);
assert(tri_map[tri.index].geomID == -1);
tri_map[tri.index].instUserID = instUserID;
tri_map[tri.index].primID = primID;
tri_map[tri.index].geomID = geomID;
tri_map[tri.index].instID = instID;
tri_map[tri.index].procedural_triangle = procedural;
tri_map[tri.index].procedural_instance = procedural_instance;
tri_map[tri.index].triangle = tri;
tri_map[tri.index].local_to_world = local_to_world;
}
}
size_t getNumPrimitives() const override {
return triangles.size();
}
public:
ze_rtas_builder_geometry_exp_flags_t gflags = 0;
bool procedural = false;
typedef sycl::usm_allocator<sycl::int4, sycl::usm::alloc::shared> triangles_alloc_ty;
triangles_alloc_ty triangles_alloc;
std::vector<sycl::int4, triangles_alloc_ty> triangles;
typedef sycl::usm_allocator<sycl::float3, sycl::usm::alloc::shared> vertices_alloc_ty;
vertices_alloc_ty vertices_alloc;
std::vector<sycl::float3, vertices_alloc_ty> vertices;
std::map<sycl::float3,uint32_t,less_float3> vertex_map;
};
template<typename Scene>
struct InstanceGeometryT : public Geometry
{
InstanceGeometryT(const Transform& local2world, std::shared_ptr<Scene> scene, bool procedural, uint32_t instUserID)
: Geometry(Type::INSTANCE), procedural(procedural), instUserID(instUserID), local2world(local2world), scene(scene) {}
virtual ~InstanceGeometryT() {}
void* operator new(size_t size) {
return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only());
}
void operator delete(void* ptr) {
sycl::free(ptr,context);
}
static void getBoundsCallback (ze_rtas_geometry_aabbs_exp_cb_params_t* params)
{
assert(params->stype == ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS);
assert(params->primID == 0);
assert(params->primIDCount == 1);
const InstanceGeometryT* inst = (InstanceGeometryT*) params->pGeomUserPtr;
const Bounds3f scene_bounds = inst->scene->getBounds();
const Bounds3f bounds = xfmBounds(inst->local2world, scene_bounds);
ze_rtas_aabb_exp_t* boundsOut = params->pBoundsOut;
boundsOut->lower.x = bounds.lower.x();
boundsOut->lower.y = bounds.lower.y();
boundsOut->lower.z = bounds.lower.z();
boundsOut->upper.x = bounds.upper.x();
boundsOut->upper.y = bounds.upper.y();
boundsOut->upper.z = bounds.upper.z();
}
virtual void getDesc(GEOMETRY_DESC* desc) override
{
if (procedural)
{
ze_rtas_builder_procedural_geometry_info_exp_t& out = desc->AABBs;
memset(&out,0,sizeof(out));
out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL;
out.geometryFlags = 0;
out.geometryMask = 0xFF;
out.primCount = 1;
out.pfnGetBoundsCb = InstanceGeometryT::getBoundsCallback;
out.pGeomUserPtr = this;
}
else
{
GEOMETRY_INSTANCE_DESC& out = desc->Instance;
memset(&out,0,sizeof(GEOMETRY_INSTANCE_DESC));
out.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE;
out.instanceFlags = 0;
out.geometryMask = 0xFF;
out.instanceUserID = instUserID;
out.transformFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR;
out.pTransform = (float*)&out.xfmdata;
out.xfmdata.vx_x = local2world.vx.x();
out.xfmdata.vx_y = local2world.vx.y();
out.xfmdata.vx_z = local2world.vx.z();
out.xfmdata.pad0 = 0.0f;
out.xfmdata.vy_x = local2world.vy.x();
out.xfmdata.vy_y = local2world.vy.y();
out.xfmdata.vy_z = local2world.vy.z();
out.xfmdata.pad1 = 0.0f;
out.xfmdata.vz_x = local2world.vz.x();
out.xfmdata.vz_y = local2world.vz.y();
out.xfmdata.vz_z = local2world.vz.z();
out.xfmdata.pad2 = 0.0f;
out.xfmdata.p_x = local2world.p.x();
out.xfmdata.p_y = local2world.p.y();
out.xfmdata.p_z = local2world.p.z();
out.xfmdata.pad3 = 0.0f;
out.pBounds = &scene->bounds;
out.pAccelerationStructure = scene->getAccel();
}
}
virtual void buildAccel(sycl::device& device, sycl::context& context, BuildMode buildMode, ze_rtas_builder_build_quality_hint_exp_t quality) override {
scene->buildAccel(device,context,buildMode);
}
virtual void buildTriMap(Transform local_to_world_in, std::vector<uint32_t> id_stack, uint32_t instUserID, bool procedural_instance, std::vector<Hit>& tri_map) override {
instUserID = this->instUserID;
scene->buildTriMap(local_to_world_in * local2world, id_stack, instUserID, procedural, tri_map);
}
size_t getNumPrimitives() const override {
return 1;
}
bool procedural;
uint32_t instUserID = -1;
Transform local2world;
std::shared_ptr<Scene> scene;
};
std::shared_ptr<TriangleMesh> createTrianglePlane (const sycl::float3& p0, const sycl::float3& dx, const sycl::float3& dy, size_t width, size_t height)
{
std::shared_ptr<TriangleMesh> mesh(new TriangleMesh);
mesh->triangles.resize(2*width*height);
mesh->vertices.resize((width+1)*(height+1));
for (size_t y=0; y<=height; y++) {
for (size_t x=0; x<=width; x++) {
sycl::float3 p = p0+float(x)/float(width)*dx+float(y)/float(height)*dy;
size_t i = y*(width+1)+x;
mesh->vertices[i] = p;
}
}
for (size_t y=0; y<height; y++) {
for (size_t x=0; x<width; x++) {
size_t i = 2*y*width+2*x;
size_t p00 = (y+0)*(width+1)+(x+0);
size_t p01 = (y+0)*(width+1)+(x+1);
size_t p10 = (y+1)*(width+1)+(x+0);
size_t p11 = (y+1)*(width+1)+(x+1);
mesh->triangles[i+0] = sycl::int4((int)p00,(int)p01,(int)p10,i+0);
mesh->triangles[i+1] = sycl::int4((int)p11,(int)p10,(int)p01,i+1);
}
}
return mesh;
}
void* alloc_accel_buffer_internal(size_t bytes, sycl::device device, sycl::context context)
{
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDeviceGetProperties failed");
ze_raytracing_mem_alloc_ext_desc_t rt_desc;
rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
rt_desc.pNext = nullptr;
rt_desc.flags = 0;
ze_device_mem_alloc_desc_t device_desc;
device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
device_desc.pNext = &rt_desc;
device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED;
device_desc.ordinal = 0;
ze_host_mem_alloc_desc_t host_desc;
host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
host_desc.pNext = nullptr;
host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
void* ptr = nullptr;
ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr);
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("accel allocation failed");
return ptr;
}
void free_accel_buffer_internal(void* ptr, sycl::context context)
{
if (ptr == nullptr) return;
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr);
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("accel free failed");
}
struct Block {
Block (size_t bytes, sycl::device device, sycl::context context)
: base((char*)alloc_accel_buffer_internal(bytes,device,context)), total(bytes), cur(0) {}
~Block() {
free_accel_buffer_internal((void*)base,context);
}
void* alloc(size_t bytes) {
bytes &= -128;
if (cur+bytes > total) return nullptr;
void* ptr = &base[cur];
cur += bytes;
return ptr;
}
char* base = nullptr;
size_t total = 0;
size_t cur = 0;
};
bool g_use_accel_blocks = true;
std::vector<std::shared_ptr<Block>> g_blocks;
void* alloc_accel_buffer(size_t bytes, sycl::device device, sycl::context context)
{
if (!g_use_accel_blocks)
return alloc_accel_buffer_internal(bytes,device,context);
if (g_blocks.size() == 0)
g_blocks.push_back(std::shared_ptr<Block>(new Block(1024*1024,device,context)));
if (bytes > 1024*1024) {
g_blocks.push_back(std::shared_ptr<Block>(new Block(bytes,device,context)));
void* ptr = g_blocks.back()->alloc(bytes);
assert(ptr);
return ptr;
}
void* ptr = g_blocks.back()->alloc(bytes);
if (ptr) return ptr;
g_blocks.push_back(std::shared_ptr<Block>(new Block(1024*1024,device,context)));
ptr = g_blocks.back()->alloc(bytes);
assert(ptr);
return ptr;
}
void free_accel_buffer(void* ptr, sycl::context context)
{
if (!g_use_accel_blocks)
return free_accel_buffer_internal(ptr,context);
}
struct Scene
{
typedef InstanceGeometryT<Scene> InstanceGeometry;
Scene()
: geometries_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), geometries(0,geometries_alloc), bounds(Bounds3f::empty()), accel(nullptr) {}
Scene(uint32_t width, uint32_t height, bool opaque, bool procedural)
: geometries_alloc(context,device,sycl::ext::oneapi::property::usm::device_read_only()), geometries(0,geometries_alloc), bounds(Bounds3f::empty()), accel(nullptr)
{
std::shared_ptr<TriangleMesh> plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,height,0), width, height);
plane->gflags = opaque ? (ze_rtas_builder_geometry_exp_flag_t) 0 : ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE;
plane->procedural = procedural;
geometries.push_back(plane);
}
~Scene() {
free_accel_buffer(accel,context);
}
void* operator new(size_t size) {
return sycl::aligned_alloc_shared(64,size,device,context,sycl::ext::oneapi::property::usm::device_read_only());
}
void operator delete(void* ptr) {
sycl::free(ptr,context);
}
void add(std::shared_ptr<TriangleMesh> mesh) {
geometries.push_back(mesh);
}
void splitIntoGeometries(uint32_t numGeometries)
{
bool progress = true;
while (progress)
{
size_t N = geometries.size();
progress = false;
for (uint32_t i=0; i<N; i++)
{
if (std::shared_ptr<TriangleMesh> mesh = std::dynamic_pointer_cast<TriangleMesh>(geometries[i]))
{
if (mesh->size() <= 1) continue;
progress = true;
/*const Triangle tri = mesh->getTriangle(RandomSampler_getUInt(rng)%mesh->size());
const float u = 2.0f*M_PI*RandomSampler_getFloat(rng);
const sycl::float3 P = tri.center();
const sycl::float3 N(cosf(u),sinf(u),0.0f);
std::shared_ptr<TriangleMesh> mesh0, mesh1;
mesh->split(P,N,mesh0,mesh1);*/
std::shared_ptr<TriangleMesh> mesh0, mesh1;
mesh->split(mesh0,mesh1);
geometries[i] = std::dynamic_pointer_cast<Geometry>(mesh0);
geometries.push_back(std::dynamic_pointer_cast<Geometry>(mesh1));
if (geometries.size() >= numGeometries)
return;
}
}
}
assert(geometries.size() == numGeometries);
}
/* splits each primitive into a geometry */
void splitIntoGeometries()
{
/* count number of triangles */
uint32_t numTriangles = 0;
for (uint32_t i=0; i<geometries.size(); i++)
{
if (std::shared_ptr<TriangleMesh> mesh = std::dynamic_pointer_cast<TriangleMesh>(geometries[i])) {
numTriangles++;
}
}
std::vector<std::shared_ptr<Geometry>, geometries_alloc_ty> new_geometries(0,geometries_alloc);
new_geometries.reserve(numTriangles);
for (uint32_t i=0; i<geometries.size(); i++)
{
if (std::shared_ptr<TriangleMesh> mesh = std::dynamic_pointer_cast<TriangleMesh>(geometries[i]))
{
if (mesh->size() <= 1) {
new_geometries.push_back(geometries[i]);
continue;
}
for (uint32_t j=0; j<mesh->size(); j++) {
std::shared_ptr<TriangleMesh> mesh0(new TriangleMesh(mesh->gflags,mesh->procedural));
mesh0->triangles.reserve(1);
mesh->vertices.reserve(3);
mesh0->addTriangle(mesh->getTriangle(j));
new_geometries.push_back(mesh0);
}
}
}
geometries = new_geometries;
}
void createInstances(uint32_t maxInstances, uint32_t blockSize = 1, bool procedural = false)
{
std::vector<std::shared_ptr<Geometry>, geometries_alloc_ty> instances(0,geometries_alloc);
for (uint32_t i=0; i<geometries.size(); i+=blockSize)
{
const uint32_t begin = i;
const uint32_t end = std::min((uint32_t)geometries.size(),i+blockSize);
if (instances.size() >= maxInstances)
{
for (uint32_t j=begin; j<end; j++) {
instances.push_back(geometries[j]);
}
continue;
}
const Transform local2world = RandomSampler_getTransform(rng);
const Transform world2local = rcp(local2world);
std::shared_ptr<Scene> scene(new Scene);
for (size_t j=begin; j<end; j++) {
geometries[j]->transform(world2local);
scene->geometries.push_back(geometries[j]);
}
//std::shared_ptr<InstanceGeometry> instance = std::make_shared<InstanceGeometry>(local2world,scene,procedural);
uint32_t instUserID = RandomSampler_getUInt(rng);
std::shared_ptr<InstanceGeometry> instance(new InstanceGeometry(local2world,scene,procedural,instUserID));
instances.push_back(instance);
}
geometries = instances;
}
void mixTrianglesAndProcedurals()
{
for (uint32_t i=0; i<geometries.size(); i++)
if (std::shared_ptr<TriangleMesh> mesh = std::dynamic_pointer_cast<TriangleMesh>(geometries[i]))
mesh->procedural = i%2;
}
void addNullGeometries(uint32_t D)
{
size_t N = geometries.size();
geometries.resize(N+D);
if (N == 0) return;
for (size_t g=N; g<N+D; g++) {
uint32_t k = RandomSampler_getUInt(rng) % N;
std::swap(geometries[g],geometries[k]);
}
}
void buildAccel(sycl::device& device, sycl::context& context, BuildMode buildMode, bool benchmark = false)
{
ze_rtas_builder_build_quality_hint_exp_t quality = (ze_rtas_builder_build_quality_hint_exp_t) (RandomSampler_getUInt(rng) % 3);
/* fill geometry descriptor buffer */
std::vector<GEOMETRY_DESC> desc(size());
std::vector<const ze_rtas_builder_geometry_info_exp_t*> geom(size());
size_t numPrimitives = 0;
for (size_t geomID=0; geomID<size(); geomID++)
{
const std::shared_ptr<Geometry>& g = geometries[geomID];
/* skip NULL geometries */
if (g == nullptr) {
geom[geomID] = nullptr;
continue;
}
numPrimitives += g->getNumPrimitives();
g->buildAccel(device,context,buildMode,quality);
g->getDesc(&desc[geomID]);
geom[geomID] = (const ze_rtas_builder_geometry_info_exp_t*) &desc[geomID];
}
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDeviceGetProperties failed");
/* estimate accel size */
size_t accelBufferBytesOut = 0;
ze_rtas_aabb_exp_t bounds;
ze_rtas_builder_build_op_exp_desc_t args;
memset(&args,0,sizeof(args));
args.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC;
args.pNext = nullptr;
args.rtasFormat = rtasProp.rtasFormat;
args.buildQuality = quality;
args.buildFlags = 0;
args.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t**) geom.data();
args.numGeometries = geom.size();
/* just for debugging purposes */
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC };
buildOpDebug.dispatchGlobalsPtr = dispatchGlobalsPtr;
args.pNext = &buildOpDebug;
#endif
ze_rtas_builder_exp_properties_t size = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&args,&size);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("BVH size estimate failed");
if (size.rtasBufferSizeBytesExpected > size.rtasBufferSizeBytesMaxRequired)
throw std::runtime_error("expected larger than worst case");
/* allocate scratch buffer */
size_t sentinelBytes = 1024; // add that many zero bytes to catch buffer overruns
std::vector<char> scratchBuffer(size.scratchBufferSizeBytes+sentinelBytes);
memset(scratchBuffer.data(),0,scratchBuffer.size());
accel = nullptr;
size_t accelBytes = 0;
/* build with different modes */
switch (buildMode)
{
case BuildMode::BUILD_WORST_CASE_SIZE: {
accelBytes = size.rtasBufferSizeBytesMaxRequired;
accel = alloc_accel_buffer(accelBytes+sentinelBytes,device,context);
memset(accel,0,accelBytes+sentinelBytes);
/* build accel */
double t0 = embree::getSeconds();
size_t numIterations = benchmark ? 16 : 1;
for (size_t i=0; i<numIterations; i++)
{
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&args,
scratchBuffer.data(),scratchBuffer.size(),
accel, accelBytes,
parallelOperation,
nullptr, &bounds, &accelBufferBytesOut);
if (parallelOperation)
{
assert(err == ZE_RESULT_EXP_RTAS_BUILD_DEFERRED);
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(parallelOperation,&prop);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("get max concurrency failed");
tbb::parallel_for(0u, prop.maxConcurrency, 1u, [&](uint32_t) {
err = ZeWrapper::zeRTASParallelOperationJoinExp(parallelOperation);
});
}
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("build error");
}
double t1 = embree::getSeconds();
if (benchmark) {
double dt = (t1-t0)/double(numIterations);
std::cout << double(numPrimitives)/dt*1E-6 << " Mprims/s" << std::endl;
}
break;
}
case BuildMode::BUILD_EXPECTED_SIZE: {
size_t bytes = size.rtasBufferSizeBytesExpected;
for (size_t i=0; i<=16; i++) // FIXME: reduce worst cast iteration number
{
if (i == 16)
throw std::runtime_error("build requires more than 16 iterations");
/* allocate BVH data */
free_accel_buffer(accel,context);
accelBytes = bytes;
accel = alloc_accel_buffer(accelBytes+sentinelBytes,device,context);
memset(accel,0,accelBytes+sentinelBytes);
/* build accel */
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&args,
scratchBuffer.data(),scratchBuffer.size(),
accel, accelBytes,
parallelOperation,
nullptr, &bounds, &accelBufferBytesOut);
if (parallelOperation)
{
assert(err == ZE_RESULT_EXP_RTAS_BUILD_DEFERRED);
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(parallelOperation,&prop);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("get max concurrency failed");
tbb::parallel_for(0u, prop.maxConcurrency, 1u, [&](uint32_t) {
err = ZeWrapper::zeRTASParallelOperationJoinExp(parallelOperation);
});
}
if (err != ZE_RESULT_EXP_RTAS_BUILD_RETRY)
break;
if (accelBufferBytesOut < bytes || size.rtasBufferSizeBytesMaxRequired < accelBufferBytesOut )
throw std::runtime_error("failed build returned wrong new estimate");
bytes = accelBufferBytesOut;
}
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("build error");
break;
}
}
this->bounds = bounds;
if (!benchmark)
{
/* scratch buffer bounds check */
for (size_t i=size.scratchBufferSizeBytes; i<size.scratchBufferSizeBytes+sentinelBytes; i++) {
if (scratchBuffer[i] == 0x00) continue;
throw std::runtime_error("scratch buffer bounds check failed");
}
/* acceleration structure bounds check */
for (size_t i=accelBytes; i<accelBytes+sentinelBytes; i++) {
if (((char*)accel)[i] == 0x00) continue;
throw std::runtime_error("acceleration buffer bounds check failed");
}
/* check if returned size of acceleration structure is correct */
for (size_t i=accelBufferBytesOut; i<accelBytes; i++) {
if (((char*)accel)[i] == 0x00) continue;
throw std::runtime_error("wrong acceleration structure size returned");
}
}
}
void buildTriMap(Transform local_to_world, std::vector<uint32_t> id_stack, uint32_t instUserID, bool procedural_instance, std::vector<Hit>& tri_map)
{
for (uint32_t geomID=0; geomID<geometries.size(); geomID++)
{
if (geometries[geomID] == nullptr)
continue;
id_stack.push_back(geomID);
geometries[geomID]->buildTriMap(local_to_world,id_stack,instUserID,procedural_instance,tri_map);
id_stack.pop_back();
}
}
size_t size() const {
return geometries.size();
}
Bounds3f getBounds() {
return {
{ bounds.lower.x, bounds.lower.y, bounds.lower.z },
{ bounds.upper.x, bounds.upper.y, bounds.upper.z }
};
}
void* getAccel() {
return accel;
}
std::shared_ptr<Geometry> operator[] ( size_t i ) { return geometries[i]; }
typedef sycl::usm_allocator<std::shared_ptr<Geometry>, sycl::usm::alloc::shared> geometries_alloc_ty;
geometries_alloc_ty geometries_alloc;
std::vector<std::shared_ptr<Geometry>, geometries_alloc_ty> geometries;
ze_rtas_aabb_exp_t bounds;
void* accel;
};
void exception_handler(sycl::exception_list exceptions)
{
for (std::exception_ptr const& e : exceptions) {
try {
std::rethrow_exception(e);
} catch(sycl::exception const& e) {
std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl;
}
}
};
void render(uint32_t i, const TestInput& in, TestOutput& out, intel_raytracing_acceleration_structure_t accel)
{
intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag();
if (!(flags & intel_raytracing_ext_flag_ray_query))
return;
/* setup ray */
intel_ray_desc_t ray;
ray.origin = in.org;
ray.direction = in.dir;
ray.tmin = in.tnear;
ray.tmax = in.tfar;
ray.mask = in.mask;
ray.flags = (intel_ray_flags_t) in.flags;
/* trace ray */
intel_ray_query_t query = intel_ray_query_init(ray,accel);
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
/* return ray data of level 0 */
out.ray0_org = intel_get_ray_origin(query,0);
out.ray0_dir = intel_get_ray_direction(query,0);
out.ray0_tnear = intel_get_ray_tmin(query,0);
out.ray0_mask = intel_get_ray_mask(query,0);
out.ray0_flags = intel_get_ray_flags(query,0);
/* clear ray data of level N */
out.rayN_org = sycl::float3(0.f,0.f,0.f);
out.rayN_dir = sycl::float3(0.f,0.f,0.f);
out.rayN_tnear = 0.0f;
out.rayN_mask = 0;
out.rayN_flags = 0;
/* potential hit */
if (!intel_is_traversal_done(query))
{
out.hit_type = TEST_POTENTIAL_HIT;
out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_potential_hit );
out.t = intel_get_hit_distance(query, intel_hit_type_potential_hit);
out.u = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).x;
out.v = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).y;
out.front_face = intel_get_hit_front_face( query, intel_hit_type_potential_hit );
out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_potential_hit );
out.instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit );
out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit );
if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_potential_hit );
else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_potential_hit );
intel_float3 vertex_out[3];
intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_potential_hit);
out.v0 = vertex_out[0];
out.v1 = vertex_out[1];
out.v2 = vertex_out[2];
/* return ray data at current level */
uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
out.rayN_org = intel_get_ray_origin(query,bvh_level);
out.rayN_dir = intel_get_ray_direction(query,bvh_level);
out.rayN_tnear = intel_get_ray_tmin(query,bvh_level);
out.rayN_mask = intel_get_ray_mask(query,bvh_level);
out.rayN_flags = intel_get_ray_flags(query,bvh_level);
/* return instance transformations */
out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_potential_hit);
out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_potential_hit);
}
/* committed hit */
else if (intel_has_committed_hit(query))
{
out.hit_type = TEST_COMMITTED_HIT;
out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_committed_hit );
out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_committed_hit );
out.t = intel_get_hit_distance(query, intel_hit_type_committed_hit);
out.u = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).x;
out.v = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).y;
out.front_face = intel_get_hit_front_face( query, intel_hit_type_committed_hit );
out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_committed_hit );
out.instID = intel_get_hit_instance_id( query, intel_hit_type_committed_hit );
out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_committed_hit );
if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_committed_hit );
else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_committed_hit );
intel_float3 vertex_out[3];
intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_committed_hit);
out.v0 = vertex_out[0];
out.v1 = vertex_out[1];
out.v2 = vertex_out[2];
/* return instance transformations */
out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit);
out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit);
}
/* miss */
else {
out.hit_type = TEST_MISS;
}
/* abandon ray query */
intel_ray_query_abandon(query);
}
void render_loop(uint32_t i, const TestInput& in, TestOutput& out, size_t scene_in, intel_raytracing_acceleration_structure_t accel, TestType test)
{
intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag();
if (!(flags & intel_raytracing_ext_flag_ray_query))
return;
/* setup ray */
intel_ray_desc_t ray;
ray.origin = in.org;
ray.direction = in.dir;
ray.tmin = in.tnear;
ray.tmax = in.tfar;
ray.mask = in.mask;
ray.flags = (intel_ray_flags_t) in.flags;
/* trace ray */
intel_ray_query_t query = intel_ray_query_init(ray,accel);
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
/* return ray data of level 0 */
out.ray0_org = intel_get_ray_origin(query,0);
out.ray0_dir = intel_get_ray_direction(query,0);
out.ray0_tnear = intel_get_ray_tmin(query,0);
out.ray0_mask = intel_get_ray_mask(query,0);
out.ray0_flags = intel_get_ray_flags(query,0);
/* clear ray data of level N */
out.rayN_org = sycl::float3(0.f,0.f,0.f);
out.rayN_dir = sycl::float3(0.f,0.f,0.f);
out.rayN_tnear = 0.0f;
out.rayN_mask = 0;
out.rayN_flags = 0;
Scene* scenes[2];
scenes[0] = (Scene*) scene_in;
scenes[1] = nullptr;
/* traversal loop */
while (!intel_is_traversal_done(query))
{
const intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit);
if (candidate == intel_candidate_type_triangle)
{
if (test == TestType::TRIANGLES_POTENTIAL_HIT)
{
out.hit_type = TEST_POTENTIAL_HIT;
out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_potential_hit );
out.t = intel_get_hit_distance(query, intel_hit_type_potential_hit);
out.u = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).x;
out.v = intel_get_hit_barycentrics(query, intel_hit_type_potential_hit).y;
out.front_face = intel_get_hit_front_face( query, intel_hit_type_potential_hit );
out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_potential_hit );
out.instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit );
out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit );
if (i%2) out.primID = intel_get_hit_triangle_primitive_id( query, intel_hit_type_potential_hit );
else out.primID = intel_get_hit_primitive_id ( query, intel_hit_type_potential_hit );
intel_float3 vertex_out[3];
intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_potential_hit);
out.v0 = vertex_out[0];
out.v1 = vertex_out[1];
out.v2 = vertex_out[2];
/* return instance transformations */
out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit);
out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit);
/* return ray data at current level */
uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
out.rayN_org = intel_get_ray_origin(query,bvh_level);
out.rayN_dir = intel_get_ray_direction(query,bvh_level);
out.rayN_tnear = intel_get_ray_tmin(query,bvh_level);
out.rayN_mask = intel_get_ray_mask(query,bvh_level);
out.rayN_flags = intel_get_ray_flags(query,bvh_level);
return;
}
if (test == TestType::TRIANGLES_ANYHIT_SHADER_COMMIT)
intel_ray_query_commit_potential_hit(query);
}
else if (candidate == intel_candidate_type_procedural)
{
const uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
const uint32_t instID = intel_get_hit_instance_id( query, intel_hit_type_potential_hit );
const uint32_t geomID = intel_get_hit_geometry_id( query, intel_hit_type_potential_hit );
const uint32_t primID = intel_get_hit_primitive_id( query, intel_hit_type_potential_hit );
Geometry* geom = nullptr;
if (instID != -1) {
Scene::InstanceGeometry* instance = (Scene::InstanceGeometry*) (scenes[0]->geometries.data() + instID)->get();
geom = (instance->scene->geometries.data() + geomID)->get();
} else {
geom = (scenes[bvh_level]->geometries.data() + geomID)->get();
}
if (geom->type == Geometry::TRIANGLE_MESH)
{
const TriangleMesh* mesh = (TriangleMesh*) geom;
const sycl::int4 tri = *(mesh->triangles.data() + primID);
const sycl::float3 tri_v0 = *(mesh->vertices.data() + tri.x());
const sycl::float3 tri_v1 = *(mesh->vertices.data() + tri.y());
const sycl::float3 tri_v2 = *(mesh->vertices.data() + tri.z());
/* calculate vertices relative to ray origin */
const sycl::float3 O = intel_get_ray_origin(query,bvh_level);
const sycl::float3 D = intel_get_ray_direction(query,bvh_level);
const float tnear = intel_get_ray_tmin(query,bvh_level);
const float tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit);
const sycl::float3 v0 = tri_v0-O;
const sycl::float3 v1 = tri_v1-O;
const sycl::float3 v2 = tri_v2-O;
/* calculate triangle edges */
const sycl::float3 e0 = v2-v0;
const sycl::float3 e1 = v0-v1;
const sycl::float3 e2 = v1-v2;
/* perform edge tests */
const float U = sycl::dot(cross(e0,v2+v0),D);
const float V = sycl::dot(cross(e1,v0+v1),D);
const float W = sycl::dot(cross(e2,v1+v2),D);
const float UVW = U+V+W;
bool valid = (std::min(U,std::min(V,W)) >= -0.0f) || (std::max(U,std::max(V,W)) <= 0.0f);
/* calculate geometry normal and denominator */
const sycl::float3 Ng = sycl::cross(e2,e1);
const float den = 2.0f*(dot(Ng,D));
/* perform depth test */
const float T = 2.0f*dot(v0,Ng);
const float t = T/den;
const float u = U/UVW;
const float v = V/UVW;
valid &= tnear <= t & t <= tfar;
valid &= den != 0.0f;
/* commit hit */
if (valid)
intel_ray_query_commit_potential_hit_override(query,t,sycl::float2(u,v));
}
else if (geom->type == Geometry::INSTANCE)
{
const Scene::InstanceGeometry* inst = (Scene::InstanceGeometry*) geom;
const Transform local2world = inst->local2world;
const Transform world2local = rcp(local2world);
/* load ray */
const uint32_t bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_potential_hit );
const sycl::float3 O = intel_get_ray_origin(query,bvh_level);
const sycl::float3 D = intel_get_ray_direction(query,bvh_level);
/* transform ray */
const sycl::float3 O1 = xfmPoint(world2local, O);
const sycl::float3 D1 = xfmVector(world2local, D);
scenes[bvh_level+1] = inst->scene.get();
intel_raytracing_acceleration_structure_t inst_accel = (intel_raytracing_acceleration_structure_t) inst->scene->getAccel();
/* continue traversal */
intel_ray_desc_t ray;
ray.origin = O1;
ray.direction = D1;
ray.tmin = intel_get_ray_tmin(query,bvh_level);
ray.tmax = 0.0f; // unused
ray.mask = intel_get_ray_mask(query,bvh_level);
ray.flags = intel_get_ray_flags(query,bvh_level);
intel_ray_query_forward_ray(query, ray, inst_accel);
}
}
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
}
/* committed hit */
if (intel_has_committed_hit(query))
{
out.hit_type = TEST_COMMITTED_HIT;
out.bvh_level = intel_get_hit_bvh_level( query, intel_hit_type_committed_hit );
out.hit_candidate = intel_get_hit_candidate( query, intel_hit_type_committed_hit );
out.t = intel_get_hit_distance(query, intel_hit_type_committed_hit);
out.u = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).x;
out.v = intel_get_hit_barycentrics(query, intel_hit_type_committed_hit).y;
out.front_face = intel_get_hit_front_face( query, intel_hit_type_committed_hit );
out.instUserID = intel_get_hit_instance_user_id( query, intel_hit_type_committed_hit );
out.instID = intel_get_hit_instance_id( query, intel_hit_type_committed_hit );
out.geomID = intel_get_hit_geometry_id( query, intel_hit_type_committed_hit );
out.primID = intel_get_hit_primitive_id( query, intel_hit_type_committed_hit );
out.v0 = sycl::float3(0.f,0.f,0.f);
out.v1 = sycl::float3(0.f,0.f,0.f);
out.v2 = sycl::float3(0.f,0.f,0.f);
if (intel_get_hit_candidate( query, intel_hit_type_committed_hit ) == intel_candidate_type_triangle)
{
intel_float3 vertex_out[3];
intel_get_hit_triangle_vertices(query, vertex_out, intel_hit_type_committed_hit);
out.v0 = vertex_out[0];
out.v1 = vertex_out[1];
out.v2 = vertex_out[2];
}
/* return instance transformations */
out.world_to_object = intel_get_hit_world_to_object(query,intel_hit_type_committed_hit);
out.object_to_world = intel_get_hit_object_to_world(query,intel_hit_type_committed_hit);
}
/* miss */
else {
out.hit_type = TEST_MISS;
}
/* abandon ray query */
intel_ray_query_abandon(query);
}
void buildTestExpectedInputAndOutput(std::shared_ptr<Scene> scene, size_t numTests, TestType test, TestInput* in, TestOutput* out_expected)
{
std::vector<Hit> tri_map;
tri_map.resize(numTests);
std::vector<uint32_t> id_stack;
Transform local_to_world;
scene->buildTriMap(local_to_world,id_stack,-1,false,tri_map);
TestHitType hit_type = TEST_MISS;
switch (test) {
case TestType::TRIANGLES_COMMITTED_HIT: hit_type = TEST_COMMITTED_HIT; break;
case TestType::TRIANGLES_POTENTIAL_HIT: hit_type = TEST_POTENTIAL_HIT; break;
case TestType::TRIANGLES_ANYHIT_SHADER_COMMIT: hit_type = TEST_COMMITTED_HIT; break;
case TestType::TRIANGLES_ANYHIT_SHADER_REJECT: hit_type = TEST_MISS; break;
case TestType::PROCEDURALS_COMMITTED_HIT: hit_type = TEST_COMMITTED_HIT; break;
default: assert(false); break;
};
//for (size_t y=0; y<height; y++)
{
//for (size_t x=0; x<width; x++)
{
//for (size_t i=0; i<2; i++)
for (size_t tid=0; tid<numTests; tid++)
{
//size_t tid = 2*(y*width+x)+i;
assert(tid < numTests);
Hit hit = tri_map[tid];
const Triangle tri = hit.triangle.transform(hit.local_to_world);
const sycl::float3 p = tri.sample(0.1f,0.6f);
const Transform world_to_local = rcp(hit.local_to_world);
in[tid].org = p + sycl::float3(0.f,0.f,-1.f);
in[tid].dir = sycl::float3(0.f,0.f,1.f);
in[tid].tnear = 0.0f;
in[tid].tfar = 10000.0f;
in[tid].mask = 0xFF;
in[tid].flags = intel_ray_flags_none;
// Ray data at level 0
out_expected[tid].ray0_org = in[tid].org;
out_expected[tid].ray0_dir = in[tid].dir;
out_expected[tid].ray0_tnear = in[tid].tnear;
out_expected[tid].ray0_mask = in[tid].mask;
out_expected[tid].ray0_flags = in[tid].flags;
// Ray data at hit bvh_level
switch (test) {
default: break;
case TestType::TRIANGLES_POTENTIAL_HIT:
out_expected[tid].rayN_org = xfmPoint (world_to_local,in[tid].org);
out_expected[tid].rayN_dir = xfmVector(world_to_local,in[tid].dir);
out_expected[tid].rayN_tnear = in[tid].tnear;
out_expected[tid].rayN_mask = in[tid].mask;
out_expected[tid].rayN_flags = in[tid].flags;
break;
}
// Hit data
out_expected[tid].hit_type = hit_type;
switch (test) {
default: break;
case TestType::TRIANGLES_COMMITTED_HIT:
case TestType::TRIANGLES_POTENTIAL_HIT:
case TestType::TRIANGLES_ANYHIT_SHADER_COMMIT:
case TestType::PROCEDURALS_COMMITTED_HIT:
if (hit.instID != -1)
out_expected[tid].bvh_level = 1;
else
out_expected[tid].bvh_level = 0;
if (hit.procedural_triangle)
out_expected[tid].hit_candidate = intel_candidate_type_procedural;
else
out_expected[tid].hit_candidate = intel_candidate_type_triangle;
out_expected[tid].t = 1.0f;
out_expected[tid].u = 0.1f;
out_expected[tid].v = 0.6f;
out_expected[tid].front_face = 0;
out_expected[tid].geomID = hit.geomID;
out_expected[tid].primID = hit.primID;
if (hit.procedural_instance) {
out_expected[tid].instID = -1;
out_expected[tid].instUserID = -1;
}
else {
out_expected[tid].instID = hit.instID;
out_expected[tid].instUserID = hit.instUserID;
}
if (hit.procedural_triangle) {
out_expected[tid].v0 = sycl::float3(0.f,0.f,0.f);
out_expected[tid].v1 = sycl::float3(0.f,0.f,0.f);
out_expected[tid].v2 = sycl::float3(0.f,0.f,0.f);
} else {
out_expected[tid].v0 = hit.triangle.v0;
out_expected[tid].v1 = hit.triangle.v1;
out_expected[tid].v2 = hit.triangle.v2;
}
if (hit.procedural_instance) {
out_expected[tid].world_to_object = Transform();
out_expected[tid].object_to_world = Transform();
} else {
out_expected[tid].world_to_object = world_to_local;
out_expected[tid].object_to_world = hit.local_to_world;
}
break;
}
}
}
}
}
uint32_t executeTest(sycl::device& device, sycl::queue& queue, sycl::context& context, InstancingType inst, TestType test)
{
const int width = 128;
const int height = 128;
const size_t numTests = 2*width*height;
bool opaque = true;
bool procedural = false;
switch (test) {
case TestType::TRIANGLES_COMMITTED_HIT : opaque = true; procedural=false; break;
case TestType::TRIANGLES_POTENTIAL_HIT : opaque = false; procedural=false; break;
case TestType::TRIANGLES_ANYHIT_SHADER_COMMIT: opaque = false; procedural=false; break;
case TestType::TRIANGLES_ANYHIT_SHADER_REJECT: opaque = false; procedural=false; break;
case TestType::PROCEDURALS_COMMITTED_HIT : opaque = false; procedural=true; break;
default: assert(false); break;
};
//std::shared_ptr<Scene> scene = std::make_shared<Scene>(width,height,opaque,procedural);
std::shared_ptr<Scene> scene(new Scene(width,height,opaque,procedural));
scene->splitIntoGeometries(16);
if (inst != InstancingType::NONE)
scene->createInstances(scene->size(),3, inst == InstancingType::SW_INSTANCING);
scene->addNullGeometries(16);
scene->buildAccel(device,context,BuildMode::BUILD_EXPECTED_SIZE,false);
/* calculate test input and expected output */
TestInput* in = (TestInput*) sycl::aligned_alloc(64,numTests*sizeof(TestInput),device,context,sycl::usm::alloc::shared);
memset(in, 0, numTests*sizeof(TestInput));
TestOutput* out_test = (TestOutput*) sycl::aligned_alloc(64,numTests*sizeof(TestOutput),device,context,sycl::usm::alloc::shared);
memset(out_test, 0, numTests*sizeof(TestOutput));
TestOutput* out_expected = (TestOutput*) sycl::aligned_alloc(64,numTests*sizeof(TestOutput),device,context,sycl::usm::alloc::shared);
memset(out_expected, 0, numTests*sizeof(TestOutput));
buildTestExpectedInputAndOutput(scene,numTests,test,in,out_expected);
/* execute test */
intel_raytracing_acceleration_structure_t accel = (intel_raytracing_acceleration_structure_t) scene->getAccel();
size_t scene_ptr = (size_t) scene.get();
if (inst != InstancingType::SW_INSTANCING &&
(test == TestType::TRIANGLES_COMMITTED_HIT || test == TestType::TRIANGLES_POTENTIAL_HIT))
{
#if defined(ZE_RAYTRACING_RT_SIMULATION)
tbb::parallel_for(size_t(0),numTests, [&](size_t i) {
render(i,in[i],out_test[i],accel);
});
#else
queue.submit([&](sycl::handler& cgh) {
const sycl::range<1> range(numTests);
cgh.parallel_for(range, [=](sycl::item<1> item) {
const uint32_t i = item.get_id(0);
render(i,in[i],out_test[i],accel);
});
});
queue.wait_and_throw();
#endif
}
else
{
#if defined(ZE_RAYTRACING_RT_SIMULATION)
tbb::parallel_for(size_t(0),numTests, [&](size_t i) {
render_loop(i,in[i],out_test[i],scene_ptr,accel,test);
});
#else
queue.submit([&](sycl::handler& cgh) {
const sycl::range<1> range(numTests);
cgh.parallel_for(range, [=](sycl::item<1> item) {
const uint32_t i = item.get_id(0);
render_loop(i,in[i],out_test[i],scene_ptr,accel,test);
});
});
queue.wait_and_throw();
#endif
}
/* verify result */
uint32_t numErrors = 0;
for (size_t tid=0; tid<numTests; tid++)
compareTestOutput(tid,numErrors,out_test[tid],out_expected[tid]);
sycl::free(in,context);
sycl::free(out_test,context);
sycl::free(out_expected,context);
return numErrors;
}
uint32_t executeBuildTest(sycl::device& device, sycl::queue& queue, sycl::context& context, TestType test, BuildMode buildMode, uint32_t numPrimitives, int testID)
{
const uint32_t width = 2*(uint32_t)ceilf(sqrtf(numPrimitives));
std::shared_ptr<TriangleMesh> plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,width,0), width, width);
if (test == TestType::BUILD_TEST_PROCEDURALS) plane->procedural = true;
plane->selectRandom(numPrimitives);
if (testID%2) plane->unshareVertices();
std::shared_ptr<Scene> scene(new Scene);
scene->add(plane);
if (test == TestType::BUILD_TEST_PROCEDURALS) {
if (testID%3==0)
scene->splitIntoGeometries();
}
else if (test == TestType::BUILD_TEST_MIXED) {
scene->splitIntoGeometries(std::max(1u,std::min(1024u,numPrimitives)));
scene->mixTrianglesAndProcedurals();
scene->createInstances(scene->size()/2);
}
else if (test == TestType::BUILD_TEST_INSTANCES) {
scene->splitIntoGeometries(std::max(1u,std::min(1024u,numPrimitives)));
scene->createInstances(scene->size());
}
scene->addNullGeometries(16);
scene->buildAccel(device,context,buildMode,false);
/* calculate test input and expected output */
TestInput* in = (TestInput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestInput),device,context,sycl::usm::alloc::shared);
memset(in, 0, numPrimitives*sizeof(TestInput));
TestOutput* out_test = (TestOutput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestOutput),device,context,sycl::usm::alloc::shared);
memset(out_test, 0, numPrimitives*sizeof(TestOutput));
TestOutput* out_expected = (TestOutput*) sycl::aligned_alloc(64,numPrimitives*sizeof(TestOutput),device,context,sycl::usm::alloc::shared);
memset(out_expected, 0, numPrimitives*sizeof(TestOutput));
buildTestExpectedInputAndOutput(scene,numPrimitives,TestType::TRIANGLES_COMMITTED_HIT,in,out_expected);
/* execute test */
intel_raytracing_acceleration_structure_t accel = (intel_raytracing_acceleration_structure_t) scene->getAccel();
size_t scene_ptr = (size_t) scene.get();
if (numPrimitives)
{
#if defined(ZE_RAYTRACING_RT_SIMULATION)
tbb::parallel_for(size_t(0),size_t(numPrimitives), [&](size_t i) {
render_loop(i,in[i],out_test[i],scene_ptr,accel,TestType::TRIANGLES_COMMITTED_HIT);
});
#else
queue.submit([&](sycl::handler& cgh) {
const sycl::range<1> range(numPrimitives);
cgh.parallel_for(range, [=](sycl::item<1> item) {
const uint32_t i = item.get_id(0);
render_loop(i,in[i],out_test[i],scene_ptr,accel,TestType::TRIANGLES_COMMITTED_HIT);
});
});
queue.wait_and_throw();
#endif
}
/* verify result */
uint32_t numErrors = 0;
for (size_t tid=0; tid<numPrimitives; tid++)
compareTestOutput(tid,numErrors,out_test[tid],out_expected[tid]);
sycl::free(in,context);
sycl::free(out_test,context);
sycl::free(out_expected,context);
return numErrors;
}
uint32_t executeBuildTest(sycl::device& device, sycl::queue& queue, sycl::context& context, TestType test, BuildMode buildMode)
{
uint32_t numErrors = 0;
for (uint32_t i=0; i<128; i++) {
const uint32_t numPrimitives = i>10 ? i*i : i;
std::cout << "testing " << numPrimitives << " primitives" << std::endl;
numErrors += executeBuildTest(device,queue,context,test,buildMode,numPrimitives,i);
}
return numErrors;
}
uint32_t executeBenchmark(sycl::device& device, sycl::queue& queue, sycl::context& context, TestType test)
{
for (uint32_t i=0; i<=20; i++)
{
const uint32_t numPrimitives = 1<<i;
switch (test) {
default: break;
case TestType::BENCHMARK_TRIANGLES : std::cout << "benchmarking " << numPrimitives << " triangles: "; break;
case TestType::BENCHMARK_PROCEDURALS: std::cout << "benchmarking " << numPrimitives << " procedurals: "; break;
};
const uint32_t width = 2*(uint32_t)ceilf(sqrtf(numPrimitives));
std::shared_ptr<TriangleMesh> plane = createTrianglePlane(sycl::float3(0,0,0), sycl::float3(width,0,0), sycl::float3(0,width,0), width, width);
if (test == TestType::BENCHMARK_PROCEDURALS) plane->procedural = true;
plane->selectSequential(numPrimitives);
std::shared_ptr<Scene> scene(new Scene);
scene->add(plane);
scene->buildAccel(device,context,BuildMode::BUILD_WORST_CASE_SIZE,true);
}
return 0;
}
enum Flags : uint32_t {
FLAGS_NONE,
DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise <
};
struct DispatchGlobals
{
uint64_t rtMemBasePtr; // base address of the allocated stack memory
uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0
uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks
uint32_t numDSSRTStacks : 16; // number of stacks per DSS
uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16
unsigned _reserved_mbz : 12;
uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...)
Flags flags; // per context control flags
};
void* allocDispatchGlobals(sycl::device device, sycl::context context)
{
size_t maxBVHLevels = 2; //RTC_MAX_INSTANCE_LEVEL_COUNT+1;
size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64;
size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC
size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes;
void* dispatchGlobalsPtr = alloc_accel_buffer(dispatchGlobalSize,device,context);
memset(dispatchGlobalsPtr, 0, dispatchGlobalSize);
DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr;
dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize;
dg->callStackHandlerKSP = 0;
dg->asyncStackSize = 0;
dg->numDSSRTStacks = 0;
dg->syncRayQueryCount = 0;
dg->_reserved_mbz = 0;
dg->maxBVHLevels = maxBVHLevels;
dg->flags = DEPTH_TEST_LESS_EQUAL;
return dispatchGlobalsPtr;
}
int main(int argc, char* argv[]) try
{
TestType test = TestType::TRIANGLES_COMMITTED_HIT;
InstancingType inst = InstancingType::NONE;
BuildMode buildMode = BuildMode::BUILD_EXPECTED_SIZE;
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
#else
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
#endif
bool jit_cache = false;
uint32_t numThreads = tbb::this_task_arena::max_concurrency();
/* command line parsing */
if (argc == 1) {
std::cout << "ERROR: no test specified" << std::endl;
return 1;
}
/* parse all command line options */
for (size_t i=1; i<argc; i++)
{
if (strcmp(argv[i], "--internal-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
}
else if (strcmp(argv[i], "--level-zero-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
}
else if (strcmp(argv[i], "--default-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::AUTO;
}
else if (strcmp(argv[i], "--triangles-committed-hit") == 0) {
test = TestType::TRIANGLES_COMMITTED_HIT;
}
else if (strcmp(argv[i], "--triangles-potential-hit") == 0) {
test = TestType::TRIANGLES_POTENTIAL_HIT;
}
else if (strcmp(argv[i], "--triangles-anyhit-shader-commit") == 0) {
test = TestType::TRIANGLES_ANYHIT_SHADER_COMMIT;
}
else if (strcmp(argv[i], "--triangles-anyhit-shader-reject") == 0) {
test = TestType::TRIANGLES_ANYHIT_SHADER_REJECT;
}
else if (strcmp(argv[i], "--procedurals-committed-hit") == 0) {
test = TestType::PROCEDURALS_COMMITTED_HIT;
}
else if (strcmp(argv[i], "--build_test_triangles") == 0) {
test = TestType::BUILD_TEST_TRIANGLES;
}
else if (strcmp(argv[i], "--build_test_procedurals") == 0) {
test = TestType::BUILD_TEST_PROCEDURALS;
}
else if (strcmp(argv[i], "--build_test_instances") == 0) {
test = TestType::BUILD_TEST_INSTANCES;
}
else if (strcmp(argv[i], "--build_test_mixed") == 0) {
test = TestType::BUILD_TEST_MIXED;
}
else if (strcmp(argv[i], "--benchmark_triangles") == 0) {
test = TestType::BENCHMARK_TRIANGLES;
}
else if (strcmp(argv[i], "--benchmark_procedurals") == 0) {
test = TestType::BENCHMARK_PROCEDURALS;
}
else if (strcmp(argv[i], "--no-instancing") == 0) {
inst = InstancingType::NONE;
}
else if (strcmp(argv[i], "--hw-instancing") == 0) {
inst = InstancingType::HW_INSTANCING;
}
else if (strcmp(argv[i], "--sw-instancing") == 0) {
inst = InstancingType::SW_INSTANCING;
}
else if (strcmp(argv[i], "--build_mode_worst_case") == 0) {
buildMode = BuildMode::BUILD_WORST_CASE_SIZE;
}
else if (strcmp(argv[i], "--build_mode_expected") == 0) {
buildMode = BuildMode::BUILD_EXPECTED_SIZE;
}
else if (strcmp(argv[i], "--jit-cache") == 0) {
if (++i >= argc) throw std::runtime_error("Error: --jit-cache <int>: syntax error");
jit_cache = atoi(argv[i]);
}
else if (strcmp(argv[i], "--threads") == 0) {
if (++i >= argc) throw std::runtime_error("Error: --threads <int>: syntax error");
numThreads = atoi(argv[i]);
}
else {
std::cout << "ERROR: invalid command line option " << argv[i] << std::endl;
return 1;
}
}
if (jit_cache)
std::cout << "WARNING: JIT caching is not supported!" << std::endl;
if (ZeWrapper::init() != ZE_RESULT_SUCCESS) {
std::cerr << "ZeWrapper not successfully initialized" << std::endl;
return 1;
}
#if defined(ZE_RAYTRACING_RT_SIMULATION)
RTCore::Init();
RTCore::SetXeVersion((RTCore::XeVersion)ZE_RAYTRACING_DEVICE);
#endif
#if TBB_INTERFACE_VERSION >= 11005
tbb::global_control tbb_threads(tbb::global_control::max_allowed_parallelism,numThreads);
#else
tbb::task_scheduler_init tbb_threads(tbb::task_scheduler_init::deferred);
tbb_threads.initialize(int(numThreads));
#endif
/* initialize SYCL device */
device = sycl::device(sycl::gpu_selector_v);
sycl::queue queue = sycl::queue(device,exception_handler);
context = queue.get_context();
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
dispatchGlobalsPtr = allocDispatchGlobals(device,context);
#endif
/* execute test */
RandomSampler_init(rng,0x56FE238A);
ze_result_t result = ZE_RESULT_SUCCESS;
sycl::platform platform = device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
/* enable RTAS extension only when enabled */
if (rtas_build_mode == ZeWrapper::RTAS_BUILD_MODE::AUTO)
{
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDriverGetExtensionProperties failed");
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDriverGetExtensionProperties failed");
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (ze_rtas_builder)
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::AUTO);
else
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::INTERNAL);
}
else
result = ZeWrapper::initRTASBuilder(hDriver,rtas_build_mode);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw std::runtime_error("cannot load ZE_experimental_rtas_builder extension");
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("cannot initialize ZE_experimental_rtas_builder extension");
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << "using internal RTAS builder" << std::endl;
else
std::cout << "using Level Zero RTAS builder" << std::endl;
/* create L0 builder object */
ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC };
ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("ze_rtas_builder creation failed");
err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver,&parallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("parallel operation creation failed");
uint32_t numErrors = 0;
if (test >= TestType::BENCHMARK_TRIANGLES)
numErrors = executeBenchmark(device,queue,context,test);
else if (test >= TestType::BUILD_TEST_TRIANGLES)
numErrors = executeBuildTest(device,queue,context,test,buildMode);
else
numErrors = executeTest(device,queue,context,inst,test);
err = ZeWrapper::zeRTASParallelOperationDestroyExp(parallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("parallel operation destruction failed");
/* destroy rtas builder again */
err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("ze_rtas_builder destruction failed");
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
free_accel_buffer(dispatchGlobalsPtr, context);
#endif
#if defined(ZE_RAYTRACING_RT_SIMULATION)
RTCore::Cleanup();
#endif
return numErrors ? 1 : 0;
}
catch (std::runtime_error e) {
std::cerr << "std::runtime_error: " << e.what() << std::endl;
return 1;
}
#pragma clang diagnostic pop