Initial commit.

This commit is contained in:
hal8174 2024-04-23 10:14:24 +02:00
commit d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions

View file

@ -0,0 +1,629 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/sysinfo.h"
#include "sys/vector.h"
#include "math/vec2.h"
#include "math/vec3.h"
#include "math/bbox.h"
#include "math/affinespace.h"
#else
#include "../../../common/sys/sysinfo.h"
#include "../../../common/sys/vector.h"
#include "../../../common/math/vec2.h"
#include "../../../common/math/vec3.h"
#include "../../../common/math/bbox.h"
#include "../../../common/math/lbbox.h"
#include "../../../common/math/affinespace.h"
#endif
#include "node_type.h"
#include <map>
#include <bitset>
namespace embree
{
/*
Internal representation for GeometryFlags.
*/
#undef OPAQUE // Windows defines OPAQUE in gdi.h
enum class GeometryFlags : uint32_t
{
NONE = 0x0,
OPAQUE = 0x1
};
inline bool operator& (GeometryFlags a, GeometryFlags b) {
return (int(a) & int(b)) ? true : false;
}
/* output operator for GeometryFlags */
inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags)
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (gflags == GeometryFlags::NONE) return cout << "NONE";
if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE ";
#endif
return cout;
}
/*
This structure is a header for each leaf type. Only the
InstanceLeaf has a slightly different header.
All primitives inside a leaf are of the same geometry, thus have
the same geometry index (geomIndex), the same shader index
(shaderIndex), the same geometry mask (geomMask), and the same
geometry flags (geomFlags).
The shaderIndex is used to calculate the shader record to
invoke. This is an extension to DXR where the geomIndex is used
for that purpose. For DXR we can always set the shaderIndex to be
equal to the geomIndex.
*/
struct PrimLeafDesc
{
static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF;
static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF;
enum Type : uint32_t
{
TYPE_NONE = 0,
/* For a node type of NODE_TYPE_PROCEDURAL we support enabling
* and disabling the opaque/non_opaque culling. */
TYPE_OPACITY_CULLING_ENABLED = 0,
TYPE_OPACITY_CULLING_DISABLED = 1
};
PrimLeafDesc() {}
PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE)
: shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags)
{
if (shaderIndex > MAX_SHADER_INDEX)
throw std::runtime_error("too large shader ID");
if (geomIndex > MAX_GEOM_INDEX)
throw std::runtime_error("too large geometry ID");
}
/* compares two PrimLeafDesc's for equality */
friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b)
{
if (a.geomIndex != b.geomIndex) return false;
assert(a.shaderIndex == b.shaderIndex);
assert(a.geomMask == b.geomMask);
assert(a.type == b.type);
assert(a.geomFlags == b.geomFlags);
return true;
}
friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) {
return !(a == b);
}
void print(std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "PrimLeafDesc {" << std::endl;
cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << geomIndex << std::endl;
cout << tab(depth) << "}";
#endif
}
friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) {
desc.print(cout,0); return cout;
}
/* Checks if opaque culling is enabled. */
bool opaqueCullingEnabled() const {
return type == TYPE_OPACITY_CULLING_ENABLED;
}
/* procedural instances store some valid shader index */
bool isProceduralInstance() const {
return shaderIndex != 0xFFFFFF;
}
/* returns geometry flags */
GeometryFlags getGeomFlags() const {
return (GeometryFlags) geomFlags;
}
public:
uint32_t shaderIndex : 24; // shader index used for shader record calculations
uint32_t geomMask : 8; // geometry mask used for ray masking
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
/*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances
/*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry
};
/*
The QuadLeaf structure stores a single quad. A quad is a triangle
pair with a shared edge. The first triangle has vertices v0,v1,v2,
while the second triangle has vertices v[j0],v[j1],v[j2], thus the
second triangle used local triangle indices.
*/
struct QuadLeaf
{
QuadLeaf() {}
QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3,
uint8_t j0, uint8_t j1, uint8_t j2,
uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1,
GeometryFlags gflags, uint32_t geomMask, bool last)
: leafDesc(shaderIndex,geomIndex,gflags,geomMask),
primIndex0(primIndex0),
primIndex1Delta(primIndex1-primIndex0), pad1(0),
j0(j0),j1(j1),j2(j2),last(last),pad(0),
v0(v0), v1(v1), v2(v2), v3(v3)
{
/* There are some constraints on the primitive indices. The
* second primitive index always has to be the largest and the
* distance between them can be at most 0xFFFF as we use 16 bits
* to encode that difference. */
assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF);
}
/* returns the i'th vertex */
__forceinline Vec3f vertex(size_t i) const {
assert(i < 4); return (&v0)[i];
}
/* Checks if the specified triange is the last inside a leaf
* list. */
bool isLast(uint32_t i = 1) const
{
assert(i<2);
if (i == 0) return false; // the first triangle is never the last
else return last; // the last bit tags the second triangle to be last
}
/* Checks if the second triangle exists. */
bool valid2() const {
return !(j0 == 0 && j1 == 0 && j2 == 0);
}
/* Calculates the number of stored triangles. */
size_t size() const {
return 1 + valid2();
}
/* Calculates the effectively used bytes. If we store only one
* triangle we waste the storage of one vertex. */
size_t usedBytes() const
{
if (valid2()) return sizeof(QuadLeaf);
else return sizeof(QuadLeaf)-sizeof(Vec3f);
}
/* Calculates to delta to add to primIndex0 to get the primitive
* index of the i'th triangle. */
uint32_t primIndexDelta(uint32_t i) const
{
assert(i<2);
return i*primIndex1Delta;
}
/* Calculates the primitive index of the i'th triangle. */
uint32_t primIndex(uint32_t i) const
{
assert(i<2);
return primIndex0 + primIndexDelta(i);
}
/* Quad mode is a special mode where the uv's over the quad are
* defined over the entire range [0,1]x[0,1]. */
bool quadMode() const {
return primIndex1Delta == 0;
}
/* Calculates the bounding box of this leaf. */
BBox3f bounds() const
{
BBox3f b = empty;
b.extend(v0);
b.extend(v1);
b.extend(v2);
if (valid2())
b.extend(v3);
return b;
}
/* output of quad leaf */
void print(std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "QuadLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
cout << tab(depth) << " triangle0 = { " << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl;
cout << tab(depth) << " v0 = " << v0 << std::endl;
cout << tab(depth) << " v1 = " << v1 << std::endl;
cout << tab(depth) << " v2 = " << v2 << std::endl;
cout << tab(depth) << " }" << std::endl;
if (valid2()) {
cout << tab(depth) << " triangle1 = { " << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl;
cout << tab(depth) << " v0 = " << vertex(j0) << std::endl;
cout << tab(depth) << " v1 = " << vertex(j1) << std::endl;
cout << tab(depth) << " v2 = " << vertex(j2) << std::endl;
cout << tab(depth) << " }" << std::endl;
}
cout << tab(depth) << "}";
#endif
}
/* output operator for QuadLeaf */
friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) {
leaf.print(cout,0); return cout;
}
public:
PrimLeafDesc leafDesc; // the leaf header
uint32_t primIndex0; // primitive index of first triangle
struct {
uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle
uint32_t pad1 : 11; // MBZ
uint32_t j0 : 2; // specifies first vertex of second triangle
uint32_t j1 : 2; // specified second vertex of second triangle
uint32_t j2 : 2; // specified third vertex of second triangle
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
uint32_t pad : 9; // unused bits
};
Vec3f v0; // first vertex of first triangle
Vec3f v1; // second vertex of first triangle
Vec3f v2; // third vertex of first triangle
Vec3f v3; // forth vertex used for second triangle
};
static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large");
/*
Internal instance flags definition.
*/
struct InstanceFlags
{
enum Flags : uint8_t
{
NONE = 0x0,
TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags
TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles
FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque
FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque
};
InstanceFlags() {}
InstanceFlags(Flags rflags)
: flags(rflags) {}
InstanceFlags(uint8_t rflags)
: flags((Flags)rflags) {}
operator Flags () const {
return flags;
}
/* output operator for InstanceFlags */
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags)
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (iflags == InstanceFlags::NONE) return cout << "NONE";
if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE ";
if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE ";
if (iflags.force_opaque) cout << "FORCE_OPAQUE ";
if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE ";
#endif
return cout;
}
public:
union
{
Flags flags;
struct
{
bool triangle_cull_disable : 1;
bool triangle_front_counterclockwise : 1;
bool force_opaque : 1;
bool force_non_opaque : 1;
};
};
};
inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) {
return (InstanceFlags::Flags)(int(a) | int(b));
}
/*
The instance leaf represent an instance. It essentially stores
transformation matrices (local to world as well as world to
local) of the instance as well as a pointer to the start node
of some BVH.
The instance leaf consists of two parts, part0 (first 64 bytes)
and part1 (second 64 bytes). Part0 will only get accessed by
hardware and stores the world to local transformation as well as
the BVH node to start traversal. Part1 stores additional data
that is only read by the shader, e.g. it stores the local to
world transformation of the instance.
The layout of the first part of the InstanceLeaf is compatible
with a ProceduralLeaf, thus we can use the same layout for
software instancing if we want.
*/
struct InstanceLeaf
{
InstanceLeaf() {}
InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask)
{
part0.shaderIndex = 0; //InstShaderRecordID;
part0.geomMask = instMask;
part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex;
part0.pad0 = 0;
part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
part0.geomFlags = (uint32_t) GeometryFlags::NONE;
part0.startNodePtr = startNodePtr;
assert((startNodePtr >> 48) == 0);
part0.instFlags = (InstanceFlags) 0;
part0.pad1 = 0;
part1.instanceID = instUserID;
part1.instanceIndex = instID;
part1.bvhPtr = (uint64_t) 0;
part1.pad = 0;
part1.obj2world_vx = obj2world.l.vx;
part1.obj2world_vy = obj2world.l.vy;
part1.obj2world_vz = obj2world.l.vz;
part0.obj2world_p = obj2world.p;
const AffineSpace3f world2obj = rcp(obj2world);
part0.world2obj_vx = world2obj.l.vx;
part0.world2obj_vy = world2obj.l.vy;
part0.world2obj_vz = world2obj.l.vz;
part1.world2obj_p = world2obj.p;
}
/* Returns the address of the start node pointer. We need this
* address to calculate relocation tables when dumping the BVH to
* disk. */
const uint64_t startNodePtrAddr() const {
return (uint64_t)((char*)&part0 + 8);
}
/* Returns the address of the BVH that contains the start node. */
const uint64_t bvhPtrAddr() const {
return (uint64_t)&part1;
}
/* returns the world to object space transformation matrix. */
const AffineSpace3f World2Obj() const {
return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p);
}
/* returns the object to world space transformation matrix. */
const AffineSpace3f Obj2World() const {
return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p);
}
/* output operator for instance leaf */
void print (std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl;
else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl;
cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl;
cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl;
cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl;
cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl;
cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl;
cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl;
cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl;
cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl;
cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl;
cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl;
cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl;
cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl;
cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl;
cout << tab(depth) << "}";
#endif
}
/* output operator for InstanceLeaf */
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) {
leaf.print(cout,0); return cout;
}
/* first 64 bytes accessed during traversal by hardware */
struct Part0
{
/* Checks if opaque culling is enabled. */
bool opaqueCullingEnabled() const {
return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
}
public:
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
uint32_t geomMask : 8; // geometry mask used for ray masking
uint32_t instanceContributionToHitGroupIndex : 24;
uint32_t pad0 : 5;
/* the following two entries are only used for procedural instances */
/*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling
/*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
uint64_t pad1 : 8; // unused bits
Vec3f world2obj_vx; // 1st column of Worl2Obj transform
Vec3f world2obj_vy; // 2nd column of Worl2Obj transform
Vec3f world2obj_vz; // 3rd column of Worl2Obj transform
Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes)
} part0;
/* second 64 bytes accessed during shading */
struct Part1
{
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
uint64_t pad : 16; // unused bits
uint32_t instanceID; // user defined value per DXR spec
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
Vec3f obj2world_vx; // 1st column of Obj2World transform
Vec3f obj2world_vy; // 2nd column of Obj2World transform
Vec3f obj2world_vz; // 3rd column of Obj2World transform
Vec3f world2obj_p; // translation of World2Obj transform
} part1;
};
static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large");
/*
Leaf type for procedural geometry. This leaf only contains the
leaf header (which identifices the geometry) and a list of
primitive indices.
The BVH will typically reference only some of the primitives
stores inside this leaf. The range is specified by a start
primitive and the last primitive is tagged with a bit.
*/
struct ProceduralLeaf
{
static const uint32_t N = 13;
/* Creates an empty procedural leaf. */
ProceduralLeaf ()
: leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0)
{
for (auto& id : _primIndex) id = 0xFFFFFFFF;
}
/* Creates a procedural leaf with one primitive. More primitives
* of the same geometry can get added later using the add
* function. */
ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last)
: leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE)
{
for (auto& id : _primIndex) id = 0xFFFFFFFF;
_primIndex[0] = primIndex;
}
/* returns the number of primitives stored inside this leaf */
uint32_t size() const {
return numPrimitives;
}
/* Calculates the effectively used bytes. */
size_t usedBytes() const
{
/*if (leafDesc.isProceduralInstance())
return sizeof(InstanceLeaf);
else*/
return sizeof(PrimLeafDesc)+4+4*numPrimitives;
}
/* if possible adds a new primitive to this leaf */
bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in)
{
assert(primIndex_in != 0xFFFFFFFF);
if (numPrimitives >= N) return false;
if (!numPrimitives) leafDesc = leafDesc_in;
if (leafDesc != leafDesc_in) return false;
_primIndex[numPrimitives] = primIndex_in;
if (last_in) last |= 1 << numPrimitives;
else last &= ~(1 << numPrimitives);
numPrimitives++;
return true;
}
/* returns the primitive index of the i'th primitive */
uint32_t primIndex(uint32_t i) const
{
assert(i < N);
return _primIndex[i];
}
/* checks if the i'th primitive is the last in a leaf list */
bool isLast(uint32_t i) const {
if (i >= N) return true; // just to make some verify tests happy
else return (last >> i) & 1;
}
/* output operator for procedural leaf */
void print (std::ostream& cout, uint32_t i, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "ProceduralLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " slot = " << i << std::endl;
if (i < N) {
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl;
} else {
cout << tab(depth) << " INVALID" << std::endl;
}
cout << tab(depth) << "}";
#endif
}
public:
PrimLeafDesc leafDesc; // leaf header identifying the geometry
uint32_t numPrimitives : 4; // number of stored primitives
uint32_t pad : 32-4-N;
uint32_t last : N; // bit vector with a last bit per primitive
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
};
static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large");
}

View file

@ -0,0 +1,56 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <cstdint>
#include <iostream>
namespace embree
{
/* The type of a node. */
enum NodeType : uint8_t
{
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
NODE_TYPE_INSTANCE = 0x1, // instance leaf
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
NODE_TYPE_QUAD = 0x4, // quad leaf
NODE_TYPE_INVALID = 0x7 // indicates invalid node
};
/* output operator for NodeType */
inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type)
{
#if !defined(__RTRT_GSIM)
switch (_type)
{
case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break;
case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break;
case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break;
case NODE_TYPE_QUAD: _cout << "QUAD"; break;
case NODE_TYPE_INVALID: _cout << "INVALID"; break;
default: _cout << "INVALID NODE TYPE"; break;
}
#endif
return _cout;
};
/*
Sub-type definition for each NodeType
*/
enum SubType : uint8_t
{
SUB_TYPE_NONE = 0,
/* sub-type for NODE_TYPE_INTERNAL */
SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children
/* Sub-type for NODE_TYPE_QUAD */
SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes)
/* Sub-type for NODE_TYPE_PROCEDURAL */
SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf
};
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "qbvh6.h"
namespace embree
{
template<typename InternalNode>
void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area)
{
InternalNode* inner = node.innerNode<InternalNode>();
size_t size = 0;
for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++)
{
if (inner->valid(i))
{
size++;
computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN);
}
}
/* update BVH statistics */
stats.internalNode.numNodes++;
stats.internalNode.numChildrenUsed += size;
stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN;
stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.internalNode.numBytes += sizeof(InternalNode);
}
void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren)
{
switch (node.type)
{
case NODE_TYPE_INSTANCE:
{
stats.instanceLeaf.numLeaves++;
stats.instanceLeaf.numPrimsUsed++;
stats.instanceLeaf.numPrimsTotal++;
stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf);
stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf);
break;
}
case NODE_TYPE_QUAD:
{
bool last = false;
stats.quadLeaf.numLeaves++;
do
{
QuadLeaf* quad = node.leafNodeQuad();
node.node += sizeof(QuadLeaf);
last = quad->isLast();
stats.quadLeaf.numPrimsUsed += quad->size();
stats.quadLeaf.numPrimsTotal += 2;
stats.quadLeaf.numBytesUsed += quad->usedBytes();
stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf);
stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area;
} while (!last);
break;
}
case NODE_TYPE_PROCEDURAL:
{
/*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!?
{
stats.proceduralLeaf.numLeaves++;
stats.proceduralLeaf.numPrimsUsed += 1;
stats.proceduralLeaf.numPrimsTotal += 1;
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf);
stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf);
}
else*/
{
bool last = false;
uint32_t currPrim = node.cur_prim;
stats.proceduralLeaf.numLeaves++;
do
{
ProceduralLeaf* leaf = node.leafNodeProcedural();
last = leaf->isLast(currPrim);
if (currPrim == 0) {
stats.proceduralLeaf.numBlocks++;
stats.proceduralLeaf.numBytesUsed += leaf->usedBytes();
stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf);
}
uint32_t primsInBlock = leaf->size();
stats.proceduralLeaf.numPrimsUsed++;
stats.proceduralLeaf.numPrimsTotal++;
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
if (++currPrim >= primsInBlock) {
currPrim = 0;
node.node += sizeof(ProceduralLeaf);
}
} while (!last);
}
break;
}
case NODE_TYPE_INTERNAL:
{
computeInternalNodeStatistics<QBVH6::InternalNode6>(stats, node, time_range, node_bounds_area, root_bounds_area);
break;
}
default:
assert(false);
}
}
BVHStatistics QBVH6::computeStatistics() const
{
BVHStatistics stats;
if (empty()) return stats;
embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6);
return stats;
}
template<typename QInternalNode>
void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
{
QInternalNode* inner = node.innerNode<QInternalNode>();
inner->print(cout, depth, false);
std::cout << std::endl;
for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++)
{
if (inner->valid(i))
print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN);
}
cout << tab(depth) << "}" << std::endl;
}
void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
{
switch (node.type)
{
case NODE_TYPE_INSTANCE: {
node.leafNodeInstance()->print(cout,depth);
cout << std::endl;
break;
}
case NODE_TYPE_QUAD:
{
std::cout << tab(depth) << "List {" << std::endl;
bool last = false;
do
{
QuadLeaf* quad = node.leafNodeQuad();
node.node += sizeof(QuadLeaf);
last = quad->isLast();
quad->print(cout,depth+1);
std::cout << std::endl;
} while (!last);
std::cout << tab(depth) << "}" << std::endl;
break;
}
case NODE_TYPE_PROCEDURAL:
{
/*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled())
{
InstanceLeaf* leaf = (InstanceLeaf*) node.node;
leaf->print(cout,depth+1);
std::cout << std::endl;
}
else*/
{
std::cout << tab(depth) << "List {" << std::endl;
bool last = false;
uint32_t currPrim = node.cur_prim;
do
{
ProceduralLeaf* leaf = node.leafNodeProcedural();
last = leaf->isLast(currPrim);
uint32_t primsInBlock = leaf->size();
leaf->print(cout,currPrim,depth+1);
std::cout << std::endl;
if (++currPrim >= primsInBlock) {
currPrim = 0;
node.node += sizeof(ProceduralLeaf);
}
} while (!last);
std::cout << tab(depth) << "}" << std::endl;
}
break;
}
case NODE_TYPE_INTERNAL:
{
printInternalNodeStatistics<QBVH6::InternalNode6>(cout, node, depth, numChildren);
break;
}
default:
std::cout << "{ INVALID_NODE }" << std::endl;
//assert(false);
}
}
unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function
return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart);
}
unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function
return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned);
}
uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
return 64 * uint64_t(base->nodeDataStart + idx);
}
uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6));
}
void QBVH6::print ( std::ostream& cout ) const
{
cout << "QBVH @ "<< this <<" header: {\n";
cout << " rootNodeOffset = " << rootNodeOffset << std::endl;
cout << " bounds = " << bounds << std::endl;
cout << " nodeDataStart = " << nodeDataStart << std::endl;
cout << " nodeDataCur = " << nodeDataCur << std::endl;
cout << " leafDataStart = " << leafDataCur << std::endl;
cout << " leafDataCur = " << leafDataCur << std::endl;
cout << " proceduralDataStart = " << proceduralDataStart << std::endl;
cout << " proceduralDataCur = " << proceduralDataCur << std::endl;
cout << " backPointerDataStart = " << backPointerDataStart << std::endl;
cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl;
cout << " numPrims = " << numPrims << std::endl;
cout << "}" << std::endl;
if (empty()) return;
print(cout,root(),0,6);
if (hasBackPointers())
{
cout << "backpointers: {\n";
for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) {
cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n";
}
cout << "}\n";
}
}
}

View file

@ -0,0 +1,230 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "qnode.h"
#include "statistics.h"
#include "rtbuild.h"
namespace embree
{
/*
The QBVH6 structure defines the bounding volume hierarchy (BVH)
that is used by the hardware. It is a BVH with 6-wide branching
factor, and quantized bounding boxes. At the leaf level quads
(QuadLeaf type), procedural geometries (ProceduralLeaf
type), and instances (InstanceLeaf type) can get referenced.
*/
inline constexpr size_t roundOffsetTo128(size_t offset) {
return 2 * ((offset + 127) / 128);
}
struct QBVH6
{
typedef NodeRef Node;
typedef InternalNode<InternalNode6Data> InternalNode6;
static constexpr uint64_t rootNodeOffset = 128;
static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large");
/* structure used to initialize the memory allocator inside the BVH */
struct SizeEstimate
{
SizeEstimate ()
: nodeBytes(0), leafBytes(0), proceduralBytes(0) {}
SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes)
: nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {}
size_t bytes() const {
return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes;
}
friend bool operator<= (SizeEstimate a, SizeEstimate b)
{
if (a.nodeBytes > b.nodeBytes) return false;
if (a.leafBytes > b.leafBytes) return false;
if (a.proceduralBytes > b.proceduralBytes) return false;
return true;
}
friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b)
{
return SizeEstimate(a.nodeBytes + b.nodeBytes,
a.leafBytes + b.leafBytes,
a.proceduralBytes + b.proceduralBytes);
}
/* output operator */
friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate)
{
cout << "SizeEstimate {" << std::endl;
cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl;
cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl;
cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl;
return cout << "}";
}
public:
size_t nodeBytes; // bytes required to store internal nodes
size_t leafBytes; // bytes required to store leaf nodes
size_t proceduralBytes; // bytes required to store procedural leaf nodes
};
/* Initializes a QBVH6 node with its provided size. The memory for
* the QBVH6 structure is overallocated and the allocation size is
* provided to the constructor, such that the allocator of the BVH
* can get initialized properly. */
QBVH6(SizeEstimate size)
: nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart),
leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart),
proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart),
backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart)
{
assert(size.nodeBytes % 64 == 0);
assert(size.leafBytes % 64 == 0);
assert(size.proceduralBytes % 64 == 0);
assert(size.bytes() <= (64LL << 32));
bounds = embree::empty;
}
/* Returns the root node of the BVH */
Node root() const {
return Node(rootNodeOffset,(uint64_t)this);
}
/* sets root not offset to point to this specified node */
void setRootNodeOffset(Node node) {
assert(node.cur_prim == 0);
uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this;
assert(rootNodeOffset == rootNodeOffset1);
}
/* check if BVH is empty */
bool empty() const {
return root().type == NODE_TYPE_INVALID;
}
/* pretty printing */
template<typename QInternalNode>
static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6);
static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6);
void print(std::ostream& cout = std::cout) const;
/* output operator */
friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) {
qbvh.print(cout); return cout;
}
/* calculates BVH statistics */
BVHStatistics computeStatistics() const;
/*
This section implements a simple allocator for BVH data. The
BVH data is separated into two section, a section where nodes
and leaves in mixed mode are allocated, and a section where
only leaves are allocate in fat-leaf mode.
*/
public:
/* allocate data in the node memory section */
char* allocNode(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(nodeDataCur + blocks <= leafDataStart);
char* ptr = (char*)this + 64 * (size_t)nodeDataCur;
nodeDataCur += blocks;
return ptr;
}
/* allocate memory in the leaf memory section */
char* allocLeaf(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(leafDataCur + blocks <= proceduralDataStart);
char* ptr = (char*)this + 64 * (size_t)leafDataCur;
leafDataCur += blocks;
return ptr;
}
/* allocate memory in procedural leaf memory section */
char* allocProceduralLeaf(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(proceduralDataCur + blocks <= backPointerDataStart);
char* ptr = (char*)this + 64 * (size_t)proceduralDataCur;
proceduralDataCur += blocks;
return ptr;
}
/* returns pointer to node address */
char* nodePtr(size_t ofs) {
return (char*)this + 64 * size_t(nodeDataStart) + ofs;
}
/* returns pointer to address for next leaf allocation */
char* leafPtr() {
return (char*)this + 64 * (size_t)leafDataCur;
}
/* returns the total number of bytes of the BVH */
size_t getTotalBytes() const {
return 64 * (size_t)backPointerDataEnd;
}
/* returns number of bytes available for node allocations */
size_t getFreeNodeBytes() const {
return 64 * (size_t)(leafDataStart - nodeDataCur);
}
/* returns number of bytes available for leaf allocations */
size_t getFreeLeafBytes() const {
return 64 * (size_t)(proceduralDataStart - leafDataCur);
}
/* returns number of bytes available for procedural leaf allocations */
size_t getFreeProceduralLeafBytes() const {
return 64 * (size_t)(backPointerDataStart - proceduralDataCur);
}
/* returns the bytes used by allocations */
size_t getUsedBytes() const {
return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes();
}
bool hasBackPointers() const {
return backPointerDataStart < backPointerDataEnd;
}
public:
ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1;
uint32_t reserved1;
BBox3f bounds; // bounding box of the BVH
uint32_t nodeDataStart; // first 64 byte block of node data
uint32_t nodeDataCur; // next free 64 byte block for node allocations
uint32_t leafDataStart; // first 64 byte block of leaf data
uint32_t leafDataCur; // next free 64 byte block for leaf allocations
uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data
uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations
uint32_t backPointerDataStart; // first 64 byte block for back pointers
uint32_t backPointerDataEnd; // end of back pointer array
uint32_t numTimeSegments = 1;
uint32_t numPrims = 0; // number of primitives in this BVH
uint32_t reserved[12];
uint64_t dispatchGlobalsPtr;
};
static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large");
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,508 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <cstdint>
#include <iostream>
#include "leaf.h"
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
inline float embree_frexp(float value, int* exp)
{
// using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results
// in an unresolved external symbol "__imp_frexp" error and therefore we
// provide a the manual implemetation referenced here
// https://en.cppreference.com/w/c/numeric/math/frexp in this case
static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations");
*exp = (value == 0) ? 0 : (int)(1 + logb(value));
return scalbn(value, -(*exp));
}
#endif
namespace embree
{
/* The NodeRef structure references a node of the BVH. It stores the
* pointer to that node as well as the node's type. If a leaf node
* is referenced the current primitive to intersect is also
* stored. */
struct NodeRef
{
NodeRef ()
: node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {}
NodeRef (void* node, NodeType type, uint8_t cur_prim)
: node((char*)node), type(type), cur_prim(cur_prim)
{
assert(cur_prim < 16);
}
/* decode from 64 bit encoding used in MemRay and Instances */
NodeRef (uint64_t nodePtr, uint64_t offset = 0)
{
node = (char*) (nodePtr & ~(uint64_t)0xF) + offset;
//type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances
type = (NodeType) (nodePtr & 0xF);
cur_prim = 0;
}
/* 64 bit encoding used in MemRay and Instances */
operator uint64_t() const
{
//assert(type == NODE_TYPE_INTERNAL);
assert(((uint64_t)node & 0xF) == 0);
assert(cur_prim == 0);
return (uint64_t)node + (uint64_t) type;
}
/* returns the internal node that is referenced */
template<typename InternalNode>
InternalNode* innerNode() const {
assert(type == NODE_TYPE_INTERNAL);
return (InternalNode*)node;
}
/* returns the instance leaf node that is referenced */
InstanceLeaf* leafNodeInstance() const {
assert(type == NODE_TYPE_INSTANCE);
return (InstanceLeaf*)node;
}
/* returns the quad leaf node that is referenced */
QuadLeaf* leafNodeQuad() const {
assert(type == NODE_TYPE_QUAD);
return (QuadLeaf*)node;
}
/* returns the procedural leaf node that is referenced */
ProceduralLeaf* leafNodeProcedural() const {
assert(type == NODE_TYPE_PROCEDURAL);
return (ProceduralLeaf*)node;
}
friend bool operator ==(const NodeRef& a, const NodeRef& b) {
return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim);
}
friend bool operator !=(const NodeRef& a, const NodeRef& b) {
return !(a == b);
}
#if !defined(__RTRT_GSIM)
friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) {
return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }";
}
#endif
public:
char* node; // pointer to the referenced node
NodeType type; // type of the node referenced
uint8_t cur_prim : 4; // current primitive referenced in the leaf
};
/*
The internal nodes of the BVH store references to 6 children and
quantized bounds for each of these children.
All children are stored consecutively in memory at a location
refered to by the childOffset. To calculate the relative
location of the i'th child the size (as encoded in blockIncr) of
all the children with index smaller than i has to get added to
that childOffset. The calculated offset specifies the signed
number of 64 bytes blocks relative to the node address to reach
the child.
If the nodeType is INTERNAL we are in mixed mode and the type of
each child is encoded inside the startPrim member. Otherwise we
are in fat leaf mode and each child has the same type 'nodeType'
and startPrim identifies the primitive where the leaf
starts. The leaf spans all primitives from this start primitive
to the end primitive which is marked as 'last'.
The bounding boxes of the children are quantized into a regular
3D grid. The world space position of the origin of that grid is
stored at full precision in the lower member, while the step
size is encoded in the exp_x, exp_y, and exp_z members as power
of 2. Thus grid coordinates together with their exponent
(xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa
and exponent of a floating point number representation without
leading zero. Thus the world space position of the bounding
planes can get calculated as follows:
x = lower.x + pow(2,exp_x) * 0.xi
y = lower.y + pow(2,exp_y) * 0.yi
z = lower.z + pow(2,exp_z) * 0.zi
As the stored grid coordinates for child bounds are only
unsigned 8-bit values, ray/box intersections can get performed
with reduced precision.
The node also stores a mask used for ray filtering. Only rays
with (node.nodeMask & ray.rayMask) != 0 are traversed, all
others are culled.
*/
struct InternalNode6Data
{
static constexpr uint32_t NUM_CHILDREN = 6;
Vec3f lower; // world space origin of quantization grid
int32_t childOffset; // offset to all children in 64B multiples
NodeType nodeType; // the type of the node
uint8_t pad; // unused byte
int8_t exp_x; // 2^exp_x is the size of the grid in x dimension
int8_t exp_y; // 2^exp_y is the size of the grid in y dimension
int8_t exp_z; // 2^exp_z is the size of the grid in z dimension
uint8_t nodeMask; // mask used for ray filtering
struct ChildData
{
uint8_t blockIncr : 2; // size of child in 64 byte blocks
uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
uint8_t pad : 2; // unused bits
} childData[NUM_CHILDREN];
uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension
uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension
uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension
uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension
uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension
uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension
};
static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large");
template<typename InternalNodeData>
struct InternalNodeCommon : public InternalNodeData
{
using InternalNodeData::NUM_CHILDREN;
InternalNodeCommon() {
}
InternalNodeCommon(NodeType type)
{
this->nodeType = type;
this->childOffset = 0;
this->nodeMask = 0xFF;
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++)
this->childData[i] = { 0, 0, 0 };
this->lower = Vec3f(0.0f);
this->exp_x = 0;
this->exp_y = 0;
this->exp_z = 0;
/* set all child bounds to invalid */
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) {
this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80;
this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00;
}
}
/* this function slightly enlarges bounds in order to make traversal watertight */
static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) {
const float err = ulps*std::numeric_limits<float>::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper)));
return enlarge(box, Vec3f(err));
}
/* this function quantizes the provided bounds */
const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const
{
const Vec3f lower = fbounds.lower-base;
const Vec3f upper = fbounds.upper-base;
float qlower_x = ldexpf(lower.x, -this->exp_x + 8);
float qlower_y = ldexpf(lower.y, -this->exp_y + 8);
float qlower_z = ldexpf(lower.z, -this->exp_z + 8);
float qupper_x = ldexpf(upper.x, -this->exp_x + 8);
float qupper_y = ldexpf(upper.y, -this->exp_y + 8);
float qupper_z = ldexpf(upper.z, -this->exp_z + 8);
assert(qlower_x >= 0.0f && qlower_x <= 255.0f);
assert(qlower_y >= 0.0f && qlower_y <= 255.0f);
assert(qlower_z >= 0.0f && qlower_z <= 255.0f);
assert(qupper_x >= 0.0f && qupper_x <= 255.0f);
assert(qupper_y >= 0.0f && qupper_y <= 255.0f);
assert(qupper_z >= 0.0f && qupper_z <= 255.0f);
qlower_x = min(max(floorf(qlower_x),0.0f),255.0f);
qlower_y = min(max(floorf(qlower_y),0.0f),255.0f);
qlower_z = min(max(floorf(qlower_z),0.0f),255.0f);
qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f);
qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f);
qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f);
BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z));
/* verify that quantized bounds are conservative */
BBox3f dbounds = dequantize_bounds(qbounds, base);
dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
assert(subset(fbounds, dbounds));
return qbounds;
}
/* this function de-quantizes the provided bounds */
const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const
{
const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8);
const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8);
const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8);
const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8);
const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8);
const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8);
return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z));
}
/* Determines if a child is valid. We have only to look at the
* topmost bit of lower_x and upper_x to determine if child is
* valid */
bool valid(int i) const {
return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80);
}
/* Determines if the node is in fat leaf mode. */
bool isFatLeaf() const {
return this->nodeType != NODE_TYPE_MIXED;
}
/* Sets the offset to the child memory. */
void setChildOffset(void* childDataPtr)
{
int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0;
assert(childDataOffset % 64 == 0);
assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64));
this->childOffset = (int32_t)(childDataOffset / 64);
}
/* Sets the type, size, and current primitive of a child */
void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim)
{
// there is no need to store block_delta for last child
if (child == NUM_CHILDREN-1) block_delta = 0;
assert(block_delta < 4);
assert(cur_prim < 16);
if (isFatLeaf())
{
assert(this->nodeType == childType);
this->childData[child].startPrim = cur_prim;
this->childData[child].blockIncr = block_delta;
}
else
{
assert(cur_prim == 0);
this->childData[child].startPrim = childType;
this->childData[child].blockIncr = block_delta;
}
}
void invalidateChild(uint32_t childID)
{
/* set child bounds to invalid */
this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80;
this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00;
}
/* Sets child bounds */
void setChildBounds(uint32_t childID, const BBox3f& fbounds)
{
assert(fbounds.lower.x <= fbounds.upper.x);
assert(fbounds.lower.y <= fbounds.upper.y);
assert(fbounds.lower.z <= fbounds.upper.z);
const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower);
this->lower_x[childID] = (uint8_t)qbounds.lower.x;
this->lower_y[childID] = (uint8_t)qbounds.lower.y;
this->lower_z[childID] = (uint8_t)qbounds.lower.z;
this->upper_x[childID] = (uint8_t)qbounds.upper.x;
this->upper_y[childID] = (uint8_t)qbounds.upper.y;
this->upper_z[childID] = (uint8_t)qbounds.upper.z;
assert(valid(childID));
}
/* Sets an entire child, including bounds, type, size, and referenced primitive. */
void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0)
{
setChildType(childID, type, block_delta, cur_prim);
setChildBounds(childID, fbounds);
}
/* Calculates the byte offset to the child. The offset is
* relative to the address this node. */
int64_t getChildOffset(uint32_t childID) const
{
int64_t ofs = this->childOffset;
for (uint32_t j = 0; j < childID; j++)
ofs += this->childData[j].blockIncr;
return 64 * ofs;
}
/* Returns the type of the child. In fat leaf mode the type is
* shared between all children, otherwise a per-child type is
* encoded inside the startPrim member for each child. */
NodeType getChildType(uint32_t childID) const
{
if (isFatLeaf())
return this->nodeType;
else
return (NodeType)(this->childData[childID].startPrim);
}
/* Returns the start primitive of a child. In case of children
* in fat-leaf mode, all children are leaves, and the start
* primitive specifies the primitive in a leaf block where the
* leaf start. */
uint32_t getChildStartPrim(uint32_t childID) const
{
if (isFatLeaf())
return this->childData[childID].startPrim;
else
return 0;
}
/* Returns a node reference for the given child. This reference
* includes the node pointer, type, and start primitive. */
NodeRef child(void* This, int childID) const {
return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID));
}
NodeRef child(int i) const {
return child((void*)this, i);
}
};
template<typename QInternalNode>
struct InternalNode : public InternalNodeCommon<QInternalNode>
{
using InternalNodeCommon<QInternalNode>::valid;
using InternalNodeCommon<QInternalNode>::getChildType;
using InternalNodeCommon<QInternalNode>::getChildOffset;
using InternalNodeCommon<QInternalNode>::getChildStartPrim;
using InternalNodeCommon<QInternalNode>::conservativeBox;
using InternalNodeCommon<QInternalNode>::dequantize_bounds;
using InternalNodeCommon<QInternalNode>::NUM_CHILDREN;
InternalNode() {
}
InternalNode (NodeType type)
: InternalNodeCommon<QInternalNode>(type) {}
/* Constructs an internal node. The quantization grid gets
* initialized from the provided parent bounds. */
InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED)
: InternalNode(type)
{
setNodeBounds(box);
}
void setNodeBounds(BBox3f box)
{
/* initialize quantization grid */
box = conservativeBox(box);
const float _ulp = std::numeric_limits<float>::epsilon();
const float up = 1.0f + float(_ulp);
Vec3f len = box.size() * up;
this->lower = box.lower;
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
#else
int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
#endif
_exp_x = max(-128,_exp_x); // enlarge too tight bounds
_exp_y = max(-128,_exp_y);
_exp_z = max(-128,_exp_z);
this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127);
this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127);
this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127);
}
/* dequantizes the bounds of the specified child */
const BBox3f bounds(uint32_t childID) const
{
return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]),
Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])),
this->lower);
}
const BBox3f bounds() const
{
BBox3f b = empty;
for (size_t i=0; i<NUM_CHILDREN; i++) {
if (!valid(i)) continue;
b.extend(bounds(i));
}
return b;
}
void copy_to( InternalNode* dst ) const
{
*dst = *this;
dst->setChildOffset((char*)this + getChildOffset(0));
}
#if !defined(__RTRT_GSIM)
/* output of internal node */
void print(std::ostream& cout, uint32_t depth, bool close) const
{
cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl;
cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl;
cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl;
for (uint32_t i = 0; i < NUM_CHILDREN; i++)
{
cout << tab(depth) << " child" << i << " = { ";
if (valid(i))
{
cout << "type = " << getChildType(i);
cout << ", offset = " << getChildOffset(i);
cout << ", prim = " << getChildStartPrim(i);
cout << ", bounds = " << bounds(i);
}
else {
cout << "INVALID";
}
cout << " }" << std::endl;
}
if (close)
cout << tab(depth) << "}";
}
/* output operator for internal node */
friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) {
node.print(cout, 0, true); return cout;
}
#endif
};
inline size_t GetInternalNodeSize(uint32_t numChildren)
{
if (numChildren <= 6)
return sizeof(InternalNode6Data);
else
assert(false);
return 0;
}
typedef InternalNode<InternalNode6Data> InternalNode6;
}

View file

@ -0,0 +1,151 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/sysinfo.h"
#include "sys/vector.h"
#include "math/vec2.h"
#include "math/vec3.h"
#include "math/bbox.h"
#include "math/affinespace.h"
#else
#include "../../common/default.h"
#endif
namespace embree
{
enum QuadifierType : uint16_t
{
QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle
QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired
QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle
QUADIFIER_MAX_DISTANCE = 31,
};
template<typename Ty, size_t N>
struct static_deque
{
__forceinline Ty pop_front() {
assert(size());
return operator[](begin++);
}
__forceinline void push_back(const Ty& v) {
assert(size() < N);
operator[](end++) = v;
}
__forceinline size_t size() const {
assert(end >= begin);
return end-begin;
}
__forceinline bool full() const {
return size() == N;
}
__forceinline void erase( size_t j )
{
assert(j >= begin && j < end);
/* fast path as we mostly just merge with the subsequent triangle */
if (likely(j == begin))
begin++;
/* fastest when left side is small */
else if (j-begin < end-j-1) {
for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1);
begin++;
}
/* fastest if right side is small */
else {
for (size_t i=j+1; i<end; i++) operator[](i-1) = operator[](i);
end--;
}
}
__forceinline Ty& operator[] ( const size_t i ) { return array[i%N]; }
__forceinline const Ty& operator[] ( const size_t i ) const { return array[i%N]; }
Ty array[N];
size_t begin = 0;
size_t end = 0;
};
__forceinline bool pair_triangles(Vec3<uint32_t> a, Vec3<uint32_t> b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2)
{
const vuint<4> va(a.x,a.y,a.z,0);
const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va;
const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va;
const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va;
lb0 = bsf(movemask(mb0));
lb1 = bsf(movemask(mb1));
lb2 = bsf(movemask(mb2));
return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1;
}
template<typename GetTriangleFunc>
__forceinline void merge_triangle_window( uint32_t geomID, static_deque<uint32_t,32>& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle )
{
uint32_t primID0 = triangleWindow.pop_front();
/* load first triangle */
Vec3<uint32_t> tri0 = getTriangle(geomID, primID0);
/* find a second triangle in triangle window to pair with */
for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot )
{
/* load second triangle */
uint32_t primID1 = triangleWindow[slot];
Vec3<uint32_t> tri1 = getTriangle(geomID, primID1);
/* try to pair triangles */
uint8_t lb0,lb1,lb2;
bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2);
/* the offset between the triangles cannot be too large as hardware limits bits for offset encode */
uint32_t prim_offset = primID1 - primID0;
pair &= prim_offset <= QUADIFIER_MAX_DISTANCE;
/* store pairing if successful */
if (pair)
{
assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED);
quads_o[primID0] = (QuadifierType) prim_offset;
quads_o[primID1] = QUADIFIER_PAIRED;
triangleWindow.erase(slot);
return;
}
}
/* make a triangle if we fail to find a candiate to pair with */
quads_o[primID0] = QUADIFIER_TRIANGLE;
}
template<typename GetTriangleFunc>
inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle )
{
static_deque<uint32_t, 32> triangleWindow;
size_t numTrianglePairs = 0;
for (uint32_t primID=primID0; primID<primID1; primID++)
{
triangleWindow.push_back(primID);
if (triangleWindow.full()) {
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
numTrianglePairs++;
}
}
while (triangleWindow.size()) {
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
numTrianglePairs++;
}
return numTrianglePairs;
}
}

View file

@ -0,0 +1,762 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTHWIF_EXPORT_API
#include "rtbuild.h"
#include "qbvh6_builder_sah.h"
// get definition of debug extension
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
#include "../../level_zero/ze_wrapper.h"
#endif
namespace embree
{
using namespace embree::isa;
static tbb::task_arena g_arena(tbb::this_task_arena::max_concurrency(),tbb::this_task_arena::max_concurrency());
inline ze_rtas_triangle_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID) {
assert(primID < geom->triangleCount);
return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride);
}
inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) {
assert(vertexID < geom->vertexCount);
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
}
inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) {
assert(primID < geom->quadCount);
return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride);
}
inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) {
assert(vertexID < geom->vertexCount);
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
}
inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
{
switch (geom->transformFormat)
{
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: {
const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: {
const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: {
const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
default:
throw std::runtime_error("invalid transform format");
}
}
inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom)
{
if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32)
throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32");
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified");
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
}
inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom)
{
if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32)
throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32");
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified");
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
}
inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom)
{
if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified");
if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero");
}
inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
{
if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified");
if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified");
if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instanciate specified");
}
inline bool buildBounds(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->triangleCount) return false;
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
if (unlikely(!isvalid(p0))) return false;
if (unlikely(!isvalid(p1))) return false;
if (unlikely(!isvalid(p2))) return false;
bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2));
return true;
}
inline bool buildBounds(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->quadCount) return false;
const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
if (unlikely(tri.v3 >= geom->vertexCount)) return false;
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
const Vec3f p3 = getVertex(geom,tri.v3);
if (unlikely(!isvalid(p0))) return false;
if (unlikely(!isvalid(p1))) return false;
if (unlikely(!isvalid(p2))) return false;
if (unlikely(!isvalid(p3))) return false;
bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3));
return true;
}
inline bool buildBounds(const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->primCount) return false;
if (geom->pfnGetBoundsCb == nullptr) return false;
BBox3f bounds;
ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS };
params.primID = primID;
params.primIDCount = 1;
params.pGeomUserPtr = geom->pGeomUserPtr;
params.pBuildUserPtr = buildUserPtr;
params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds;
(geom->pfnGetBoundsCb)(&params);
if (unlikely(!isvalid(bounds.lower))) return false;
if (unlikely(!isvalid(bounds.upper))) return false;
if (unlikely(bounds.empty())) return false;
bbox = (BBox3f&) bounds;
return true;
}
inline bool buildBounds(const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= 1) return false;
if (geom->pAccelerationStructure == nullptr) return false;
if (geom->pTransform == nullptr) return false;
const AffineSpace3fa local2world = getTransform(geom);
const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z);
const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z);
const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper));
if (unlikely(!isvalid(bounds.lower))) return false;
if (unlikely(!isvalid(bounds.upper))) return false;
if (unlikely(bounds.empty())) return false;
bbox = bounds;
return true;
}
template<typename GeometryType>
PrimInfo createGeometryPrimRefArray(const GeometryType* geom, void* buildUserPtr, evector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID)
{
PrimInfo pinfo(empty);
for (uint32_t primID=r.begin(); primID<r.end(); primID++)
{
BBox3fa bounds = empty;
if (!buildBounds(geom,primID,bounds,buildUserPtr)) continue;
const PrimRef prim(bounds,geomID,primID);
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
typedef struct _zet_base_desc_t
{
/** [in] type of this structure */
ze_structure_type_t stype;
/** [in,out][optional] must be null or a pointer to an extension-specific structure */
const void* pNext;
} zet_base_desc_t_;
#define VALIDATE(arg) \
{\
ze_result_t result = validate(arg);\
if (result != ZE_RESULT_SUCCESS) return result; \
}
#define VALIDATE_PTR(arg) \
{ \
if ((arg) == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
} \
ze_result_t validate(ze_driver_handle_t hDriver)
{
if (hDriver == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_device_handle_t hDevice)
{
if (hDevice == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ZE_RESULT_SUCCESS;
}
bool checkDescChain(zet_base_desc_t_* desc)
{
/* supporting maximal 1024 to also detect cycles */
for (size_t i=0; i<1024; i++) {
if (desc->pNext == nullptr) return true;
desc = (zet_base_desc_t_*) desc->pNext;
}
return false;
}
struct ze_rtas_builder
{
ze_rtas_builder () {
}
~ze_rtas_builder() {
magick = 0x0;
}
bool verify() const {
return magick == MAGICK;
}
enum { MAGICK = 0x45FE67E1 };
uint32_t magick = MAGICK;
};
ze_result_t validate(ze_rtas_builder_exp_handle_t hBuilder)
{
if (hBuilder == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
if (!((ze_rtas_builder*)hBuilder)->verify())
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
return ZE_RESULT_SUCCESS;
}
struct ze_rtas_parallel_operation_t
{
ze_rtas_parallel_operation_t() {
}
~ze_rtas_parallel_operation_t() {
magick = 0x0;
}
ze_result_t verify() const
{
if (magick != MAGICK)
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
return ZE_RESULT_SUCCESS;
}
enum { MAGICK = 0xE84567E1 };
uint32_t magick = MAGICK;
std::atomic<bool> object_in_use = false;
ze_result_t errorCode = ZE_RESULT_SUCCESS;
tbb::task_group group;
};
ze_result_t validate(ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
{
if (hParallelOperation == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify();
}
ze_result_t validate(const ze_rtas_builder_exp_desc_t* pDescriptor)
{
if (pDescriptor == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (!checkDescChain((zet_base_desc_t_*)pDescriptor))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties)
{
if (pProperties == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (!checkDescChain((zet_base_desc_t_*)pProperties))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_format_exp_t rtasFormat)
{
if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(const ze_rtas_builder_build_op_exp_desc_t* args)
{
/* check for valid pointers */
if (args == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check if input descriptor has proper type */
if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)args))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check if acceleration structure format is supported */
VALIDATE(args->rtasFormat);
/* check for valid geometries array */
if (args->ppGeometries == nullptr && args->numGeometries > 0)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* validate that number of geometries are in range */
if (args->numGeometries > 0x00FFFFFF)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* validate build quality */
if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* validate build flags */
if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_builder_exp_properties_t* pProp)
{
/* check for valid pointers */
if (pProp == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check if return property has proper type */
if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)pProp))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_parallel_operation_exp_properties_t* pProperties)
{
/* check for valid pointer */
if (pProperties == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check for proper property */
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)pProperties))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder)
{
/* input validation */
VALIDATE(hDriver);
VALIDATE(pDescriptor);
VALIDATE_PTR(phBuilder);
*phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder)
{
VALIDATE(hBuilder);
delete (ze_rtas_builder*) hBuilder;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
const ze_rtas_format_exp_t accelFormat,
const ze_rtas_format_exp_t otherAccelFormat )
{
/* input validation */
VALIDATE(hDriver);
VALIDATE(accelFormat);
VALIDATE(otherAccelFormat);
/* check if rtas formats are compatible */
if (accelFormat == otherAccelFormat)
return ZE_RESULT_SUCCESS;
/* report incompatible format */
return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE;
}
uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom)
{
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1;
default : return 0;
};
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
ze_rtas_builder_exp_properties_t* pProp)
{
/* input validation */
VALIDATE(hBuilder);
VALIDATE(args);
VALIDATE(pProp);
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
const size_t numGeometries = args->numGeometries;
auto getSize = [&](uint32_t geomID) -> size_t {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return 0;
return getNumPrimitives(geom);
};
auto getType = [&](unsigned int geomID)
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
default: throw std::runtime_error("invalid geometry type");
};
};
/* query memory requirements from builder */
size_t expectedBytes = 0;
size_t worstCaseBytes = 0;
size_t scratchBytes = 0;
QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes);
/* fill return struct */
pProp->flags = 0;
pProp->rtasBufferSizeBytesExpected = expectedBytes;
pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes;
pProp->scratchBufferSizeBytes = scratchBytes;
return ZE_RESULT_SUCCESS;
}
ze_result_t zeRTASBuilderBuildExpBody(const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try
{
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
const uint32_t numGeometries = args->numGeometries;
/* verify input descriptors */
parallel_for(numGeometries,[&](uint32_t geomID) {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return;
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break;
default: throw std::runtime_error("invalid geometry type");
};
});
auto getSize = [&](uint32_t geomID) -> size_t {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return 0;
return getNumPrimitives(geom);
};
auto getType = [&](unsigned int geomID)
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
default: throw std::runtime_error("invalid geometry type");
};
};
auto createPrimRefArray = [&] (evector<PrimRef>& prims, BBox1f time_range, const range<size_t>& r, size_t k, unsigned int geomID) -> PrimInfo
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray((ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray((ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray((ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray((ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
default: throw std::runtime_error("invalid geometry type");
};
};
auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags {
return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE;
};
auto getTriangle = [&](unsigned int geomID, unsigned int primID)
{
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle();
if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle();
if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle();
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask);
};
auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) {
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
return Vec3<uint32_t>(tri.v0,tri.v1,tri.v2);
};
auto getQuad = [&](unsigned int geomID, unsigned int primID)
{
const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID);
const Vec3f p0 = getVertex(geom,quad.v0);
const Vec3f p1 = getVertex(geom,quad.v1);
const Vec3f p2 = getVertex(geom,quad.v2);
const Vec3f p3 = getVertex(geom,quad.v3);
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask);
};
auto getProcedural = [&](unsigned int geomID, unsigned int primID) {
const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID];
assert(geom);
return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags
};
auto getInstance = [&](unsigned int geomID, unsigned int primID)
{
assert(geometries[geomID]);
assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE);
const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID];
void* accel = geom->pAccelerationStructure;
const AffineSpace3fa local2world = getTransform(geom);
return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags
};
/* dispatch globals ptr for debugging purposes */
void* dispatchGlobalsPtr = nullptr;
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
if (args->pNext) {
zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext;
if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC) {
ze_rtas_builder_build_op_debug_exp_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_exp_desc_t*) next;
dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr;
}
}
#endif
bool verbose = false;
bool success = QBVH6BuilderSAH::build(numGeometries, nullptr,
getSize, getType,
createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance,
(char*)pRtasBuffer, rtasBufferSizeBytes,
pScratchBuffer, scratchBufferSizeBytes,
(BBox3f*) pBounds, pRtasBufferSizeBytes,
args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr);
if (!success) {
return ZE_RESULT_EXP_RTAS_BUILD_RETRY;
}
return ZE_RESULT_SUCCESS;
}
catch (std::exception& e) {
//std::cerr << "caught exception during BVH build: " << e.what() << std::endl;
return ZE_RESULT_ERROR_UNKNOWN;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes)
{
/* input validation */
VALIDATE(hBuilder);
VALIDATE(args);
VALIDATE_PTR(pScratchBuffer);
VALIDATE_PTR(pRtasBuffer);
/* if parallel operation is provided then execute using thread arena inside task group ... */
if (hParallelOperation)
{
VALIDATE(hParallelOperation);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
if (op->object_in_use.load())
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
op->object_in_use.store(true);
g_arena.execute([&](){ op->group.run([=](){
op->errorCode = zeRTASBuilderBuildExpBody(args,
pScratchBuffer, scratchBufferSizeBytes,
pRtasBuffer, rtasBufferSizeBytes,
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
});
});
return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED;
}
/* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */
else
{
ze_result_t errorCode = ZE_RESULT_SUCCESS;
g_arena.execute([&](){ errorCode = zeRTASBuilderBuildExpBody(args,
pScratchBuffer, scratchBufferSizeBytes,
pRtasBuffer, rtasBufferSizeBytes,
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
});
return errorCode;
}
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation)
{
/* input validation */
VALIDATE(hDriver);
VALIDATE_PTR(phParallelOperation);
/* create parallel operation object */
*phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation )
{
/* input validation */
VALIDATE(hParallelOperation);
/* delete parallel operation */
delete (ze_rtas_parallel_operation_t*) hParallelOperation;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties )
{
/* input validation */
VALIDATE(hParallelOperation);
VALIDATE(pProperties);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
if (!op->object_in_use.load())
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
/* return properties */
pProperties->flags = 0;
pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
{
/* check for valid handle */
VALIDATE(hParallelOperation);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
g_arena.execute([&](){ op->group.wait(); });
op->object_in_use.store(false); // this is slighty too early
return op->errorCode;
}
}

View file

@ -0,0 +1,66 @@
// Copyright 2009-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../level_zero/ze_api.h"
#if !defined(ZE_RTAS_BUILDER_EXP_NAME)
#include "../../level_zero/ze_rtas.h"
#endif
#include <stddef.h>
#include <stdint.h>
#if defined(__cplusplus)
# define RTHWIF_API_EXTERN_C extern "C"
#else
# define RTHWIF_API_EXTERN_C
#endif
#if defined(_WIN32)
#if defined(EMBREE_RTHWIF_STATIC_LIB)
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C
#else
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport)
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport)
#endif
#else
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif
typedef enum _ze_raytracing_accel_format_internal_t {
ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2
} ze_raytracing_accel_format_internal_t;
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
const ze_rtas_format_exp_t accelFormat,
const ze_rtas_format_exp_t otherAccelFormat);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
ze_rtas_builder_exp_properties_t* pProp);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation );
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties );
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation);

View file

@ -0,0 +1,155 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "statistics.h"
namespace embree
{
class RestoreStreamState
{
public:
RestoreStreamState(std::ostream& iostream)
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
}
~RestoreStreamState() {
iostream.flags(flags);
iostream.precision(precision);
}
private:
std::ostream& iostream;
std::ios::fmtflags flags;
std::streamsize precision;
};
double ratio(double a, double b)
{
if (b == 0.0) return 0.0f;
else return a/b;
}
double percent(double a, double b) {
return 100.0*ratio(a,b);
}
double ratio(size_t a, size_t b) {
return ratio(double(a), double(b));
}
double percent(size_t a, size_t b) {
return percent(double(a), double(b));
}
void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const
{
RestoreStreamState iostate(cout);
cout << std::setw(7) << numNodes << " ";
cout << std::setw(7) << std::setprecision(3) << sah();
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB ";
cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% ";
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " ";
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
cout << std::endl;
}
void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const
{
RestoreStreamState iostate(cout);
size_t N = blocks ? numBlocks : numLeaves;
cout << std::setw(7) << N << " ";
cout << std::setw(7) << std::setprecision(3) << sah();
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB ";
cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% ";
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " ";
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
cout << std::endl;
}
void BVHStatistics::print (std::ostream& cout) const
{
RestoreStreamState iostate(cout);
cout.setf(std::ios::fixed, std::ios::floatfield);
cout.fill(' ');
double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH;
size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes();
size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves;
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
cout << std::endl;
cout << "BVH statistics:" << std::endl;
cout << "---------------" << std::endl;
cout << " numScenePrimitives = " << numScenePrimitives << std::endl;
cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl;
cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl;
cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl;
cout << " numBVHPrimitives = " << totalPrimitives << std::endl;
cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl;
cout << std::endl;
cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl;
cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl;
cout << " total : ";
cout << std::setw(7) << totalNodes << " ";
cout << std::setw(7) << std::setprecision(3) << totalSAH;
cout << " 100.00% ";
cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB ";
cout << " 100.00% ";
cout << " 100.00% ";
cout << " ";
cout << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl;
LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf;
cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true);
cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
}
void BVHStatistics::print_raw(std::ostream& cout) const
{
RestoreStreamState iostate(cout);
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl;
cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl;
cout << "bvh_internal_num = " << internalNode.numNodes << std::endl;
cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl;
cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl;
cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl;
cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl;
cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl;
cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl;
cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl;
cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl;
cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl;
cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl;
cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl;
cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl;
cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl;
cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl;
cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl;
cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl;
cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl;
cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl;
cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl;
cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl;
cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl;
}
}

View file

@ -0,0 +1,118 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/platform.h"
#else
#include "../../../common/sys/platform.h"
#endif
namespace embree
{
struct BVHStatistics
{
struct NodeStat
{
NodeStat ( double nodeSAH = 0,
size_t numNodes = 0,
size_t numChildrenUsed = 0,
size_t numChildrenTotal = 0,
size_t numBytes = 0)
: nodeSAH(nodeSAH),
numNodes(numNodes),
numChildrenUsed(numChildrenUsed),
numChildrenTotal(numChildrenTotal),
numBytes(numBytes) {}
double sah() const { return nodeSAH; }
size_t bytes() const { return numBytes; }
size_t size() const { return numNodes; }
double fillRateNom () const { return double(numChildrenUsed); }
double fillRateDen () const { return double(numChildrenTotal); }
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
{
return NodeStat(a.nodeSAH + b.nodeSAH,
a.numNodes+b.numNodes,
a.numChildrenUsed+b.numChildrenUsed,
a.numChildrenTotal+b.numChildrenTotal,
a.numBytes+b.numBytes);
}
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const;
public:
double nodeSAH;
size_t numNodes;
size_t numChildrenUsed;
size_t numChildrenTotal;
size_t numBytes;
};
struct LeafStat
{
LeafStat(double leafSAH = 0.0f,
size_t numLeaves = 0,
size_t numBlocks = 0,
size_t numPrimsUsed = 0,
size_t numPrimsTotal = 0,
size_t numBytesUsed = 0,
size_t numBytesTotal = 0)
: leafSAH(leafSAH),
numLeaves(numLeaves),
numBlocks(numBlocks),
numPrimsUsed(numPrimsUsed),
numPrimsTotal(numPrimsTotal),
numBytesUsed(numBytesUsed),
numBytesTotal(numBytesTotal) {}
double sah() const { return leafSAH; }
size_t bytes() const { return numBytesTotal; }
size_t size() const { return numLeaves; }
double fillRateNom () const { return double(numPrimsUsed); }
double fillRateDen () const { return double(numPrimsTotal); }
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
{
return LeafStat(a.leafSAH + b.leafSAH,
a.numLeaves+b.numLeaves,
a.numBlocks+b.numBlocks,
a.numPrimsUsed+b.numPrimsUsed,
a.numPrimsTotal+b.numPrimsTotal,
a.numBytesUsed+b.numBytesUsed,
a.numBytesTotal+b.numBytesTotal);
}
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const;
public:
double leafSAH; //!< SAH of the leaves only
size_t numLeaves; //!< Number of leaf nodes.
size_t numBlocks; //!< Number of blocks referenced
size_t numPrimsUsed; //!< Number of active primitives
size_t numPrimsTotal; //!< Number of active and inactive primitives
size_t numBytesUsed; //!< Number of used bytes
size_t numBytesTotal; //!< Number of total bytes of leaves.
};
BVHStatistics ()
: numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {}
void print (std::ostream& cout) const;
void print_raw(std::ostream& cout) const;
size_t numScenePrimitives;
size_t numBuildPrimitives;
size_t numBuildPrimitivesPostSplit;
NodeStat internalNode;
LeafStat quadLeaf;
LeafStat proceduralLeaf;
LeafStat instanceLeaf;
};
}