Initial commit.
This commit is contained in:
commit
d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions
629
Framework/external/embree/kernels/rthwif/rtbuild/leaf.h
vendored
Normal file
629
Framework/external/embree/kernels/rthwif/rtbuild/leaf.h
vendored
Normal file
|
|
@ -0,0 +1,629 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/sysinfo.h"
|
||||
#include "sys/vector.h"
|
||||
#include "math/vec2.h"
|
||||
#include "math/vec3.h"
|
||||
#include "math/bbox.h"
|
||||
#include "math/affinespace.h"
|
||||
#else
|
||||
#include "../../../common/sys/sysinfo.h"
|
||||
#include "../../../common/sys/vector.h"
|
||||
#include "../../../common/math/vec2.h"
|
||||
#include "../../../common/math/vec3.h"
|
||||
#include "../../../common/math/bbox.h"
|
||||
#include "../../../common/math/lbbox.h"
|
||||
#include "../../../common/math/affinespace.h"
|
||||
#endif
|
||||
|
||||
#include "node_type.h"
|
||||
|
||||
#include <map>
|
||||
#include <bitset>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*
|
||||
|
||||
Internal representation for GeometryFlags.
|
||||
|
||||
*/
|
||||
|
||||
#undef OPAQUE // Windows defines OPAQUE in gdi.h
|
||||
enum class GeometryFlags : uint32_t
|
||||
{
|
||||
NONE = 0x0,
|
||||
OPAQUE = 0x1
|
||||
};
|
||||
|
||||
inline bool operator& (GeometryFlags a, GeometryFlags b) {
|
||||
return (int(a) & int(b)) ? true : false;
|
||||
}
|
||||
|
||||
/* output operator for GeometryFlags */
|
||||
inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags)
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (gflags == GeometryFlags::NONE) return cout << "NONE";
|
||||
if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE ";
|
||||
#endif
|
||||
return cout;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
This structure is a header for each leaf type. Only the
|
||||
InstanceLeaf has a slightly different header.
|
||||
|
||||
All primitives inside a leaf are of the same geometry, thus have
|
||||
the same geometry index (geomIndex), the same shader index
|
||||
(shaderIndex), the same geometry mask (geomMask), and the same
|
||||
geometry flags (geomFlags).
|
||||
|
||||
The shaderIndex is used to calculate the shader record to
|
||||
invoke. This is an extension to DXR where the geomIndex is used
|
||||
for that purpose. For DXR we can always set the shaderIndex to be
|
||||
equal to the geomIndex.
|
||||
|
||||
*/
|
||||
|
||||
struct PrimLeafDesc
|
||||
{
|
||||
static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF;
|
||||
static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF;
|
||||
|
||||
enum Type : uint32_t
|
||||
{
|
||||
TYPE_NONE = 0,
|
||||
|
||||
/* For a node type of NODE_TYPE_PROCEDURAL we support enabling
|
||||
* and disabling the opaque/non_opaque culling. */
|
||||
|
||||
TYPE_OPACITY_CULLING_ENABLED = 0,
|
||||
TYPE_OPACITY_CULLING_DISABLED = 1
|
||||
};
|
||||
|
||||
PrimLeafDesc() {}
|
||||
|
||||
PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE)
|
||||
: shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags)
|
||||
{
|
||||
if (shaderIndex > MAX_SHADER_INDEX)
|
||||
throw std::runtime_error("too large shader ID");
|
||||
|
||||
if (geomIndex > MAX_GEOM_INDEX)
|
||||
throw std::runtime_error("too large geometry ID");
|
||||
}
|
||||
|
||||
/* compares two PrimLeafDesc's for equality */
|
||||
friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b)
|
||||
{
|
||||
if (a.geomIndex != b.geomIndex) return false;
|
||||
assert(a.shaderIndex == b.shaderIndex);
|
||||
assert(a.geomMask == b.geomMask);
|
||||
assert(a.type == b.type);
|
||||
assert(a.geomFlags == b.geomFlags);
|
||||
return true;
|
||||
}
|
||||
|
||||
friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "PrimLeafDesc {" << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << geomIndex << std::endl;
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) {
|
||||
desc.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
/* Checks if opaque culling is enabled. */
|
||||
bool opaqueCullingEnabled() const {
|
||||
return type == TYPE_OPACITY_CULLING_ENABLED;
|
||||
}
|
||||
|
||||
/* procedural instances store some valid shader index */
|
||||
bool isProceduralInstance() const {
|
||||
return shaderIndex != 0xFFFFFF;
|
||||
}
|
||||
|
||||
/* returns geometry flags */
|
||||
GeometryFlags getGeomFlags() const {
|
||||
return (GeometryFlags) geomFlags;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t shaderIndex : 24; // shader index used for shader record calculations
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
|
||||
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
|
||||
/*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances
|
||||
/*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
The QuadLeaf structure stores a single quad. A quad is a triangle
|
||||
pair with a shared edge. The first triangle has vertices v0,v1,v2,
|
||||
while the second triangle has vertices v[j0],v[j1],v[j2], thus the
|
||||
second triangle used local triangle indices.
|
||||
|
||||
*/
|
||||
|
||||
struct QuadLeaf
|
||||
{
|
||||
QuadLeaf() {}
|
||||
|
||||
QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3,
|
||||
uint8_t j0, uint8_t j1, uint8_t j2,
|
||||
uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1,
|
||||
GeometryFlags gflags, uint32_t geomMask, bool last)
|
||||
|
||||
: leafDesc(shaderIndex,geomIndex,gflags,geomMask),
|
||||
primIndex0(primIndex0),
|
||||
primIndex1Delta(primIndex1-primIndex0), pad1(0),
|
||||
j0(j0),j1(j1),j2(j2),last(last),pad(0),
|
||||
v0(v0), v1(v1), v2(v2), v3(v3)
|
||||
{
|
||||
/* There are some constraints on the primitive indices. The
|
||||
* second primitive index always has to be the largest and the
|
||||
* distance between them can be at most 0xFFFF as we use 16 bits
|
||||
* to encode that difference. */
|
||||
assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF);
|
||||
}
|
||||
|
||||
/* returns the i'th vertex */
|
||||
__forceinline Vec3f vertex(size_t i) const {
|
||||
assert(i < 4); return (&v0)[i];
|
||||
}
|
||||
|
||||
/* Checks if the specified triange is the last inside a leaf
|
||||
* list. */
|
||||
bool isLast(uint32_t i = 1) const
|
||||
{
|
||||
assert(i<2);
|
||||
if (i == 0) return false; // the first triangle is never the last
|
||||
else return last; // the last bit tags the second triangle to be last
|
||||
}
|
||||
|
||||
/* Checks if the second triangle exists. */
|
||||
bool valid2() const {
|
||||
return !(j0 == 0 && j1 == 0 && j2 == 0);
|
||||
}
|
||||
|
||||
/* Calculates the number of stored triangles. */
|
||||
size_t size() const {
|
||||
return 1 + valid2();
|
||||
}
|
||||
|
||||
/* Calculates the effectively used bytes. If we store only one
|
||||
* triangle we waste the storage of one vertex. */
|
||||
size_t usedBytes() const
|
||||
{
|
||||
if (valid2()) return sizeof(QuadLeaf);
|
||||
else return sizeof(QuadLeaf)-sizeof(Vec3f);
|
||||
}
|
||||
|
||||
/* Calculates to delta to add to primIndex0 to get the primitive
|
||||
* index of the i'th triangle. */
|
||||
uint32_t primIndexDelta(uint32_t i) const
|
||||
{
|
||||
assert(i<2);
|
||||
return i*primIndex1Delta;
|
||||
}
|
||||
|
||||
/* Calculates the primitive index of the i'th triangle. */
|
||||
uint32_t primIndex(uint32_t i) const
|
||||
{
|
||||
assert(i<2);
|
||||
return primIndex0 + primIndexDelta(i);
|
||||
}
|
||||
|
||||
/* Quad mode is a special mode where the uv's over the quad are
|
||||
* defined over the entire range [0,1]x[0,1]. */
|
||||
bool quadMode() const {
|
||||
return primIndex1Delta == 0;
|
||||
}
|
||||
|
||||
/* Calculates the bounding box of this leaf. */
|
||||
BBox3f bounds() const
|
||||
{
|
||||
BBox3f b = empty;
|
||||
b.extend(v0);
|
||||
b.extend(v1);
|
||||
b.extend(v2);
|
||||
if (valid2())
|
||||
b.extend(v3);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* output of quad leaf */
|
||||
void print(std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "QuadLeaf {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
|
||||
cout << tab(depth) << " triangle0 = { " << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl;
|
||||
cout << tab(depth) << " v0 = " << v0 << std::endl;
|
||||
cout << tab(depth) << " v1 = " << v1 << std::endl;
|
||||
cout << tab(depth) << " v2 = " << v2 << std::endl;
|
||||
cout << tab(depth) << " }" << std::endl;
|
||||
if (valid2()) {
|
||||
cout << tab(depth) << " triangle1 = { " << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl;
|
||||
cout << tab(depth) << " v0 = " << vertex(j0) << std::endl;
|
||||
cout << tab(depth) << " v1 = " << vertex(j1) << std::endl;
|
||||
cout << tab(depth) << " v2 = " << vertex(j2) << std::endl;
|
||||
cout << tab(depth) << " }" << std::endl;
|
||||
}
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* output operator for QuadLeaf */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) {
|
||||
leaf.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
PrimLeafDesc leafDesc; // the leaf header
|
||||
|
||||
uint32_t primIndex0; // primitive index of first triangle
|
||||
struct {
|
||||
uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle
|
||||
uint32_t pad1 : 11; // MBZ
|
||||
uint32_t j0 : 2; // specifies first vertex of second triangle
|
||||
uint32_t j1 : 2; // specified second vertex of second triangle
|
||||
uint32_t j2 : 2; // specified third vertex of second triangle
|
||||
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
|
||||
uint32_t pad : 9; // unused bits
|
||||
};
|
||||
|
||||
Vec3f v0; // first vertex of first triangle
|
||||
Vec3f v1; // second vertex of first triangle
|
||||
Vec3f v2; // third vertex of first triangle
|
||||
Vec3f v3; // forth vertex used for second triangle
|
||||
};
|
||||
|
||||
static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large");
|
||||
|
||||
/*
|
||||
|
||||
Internal instance flags definition.
|
||||
|
||||
*/
|
||||
|
||||
struct InstanceFlags
|
||||
{
|
||||
enum Flags : uint8_t
|
||||
{
|
||||
NONE = 0x0,
|
||||
TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags
|
||||
TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles
|
||||
FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque
|
||||
FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque
|
||||
};
|
||||
|
||||
InstanceFlags() {}
|
||||
|
||||
InstanceFlags(Flags rflags)
|
||||
: flags(rflags) {}
|
||||
|
||||
InstanceFlags(uint8_t rflags)
|
||||
: flags((Flags)rflags) {}
|
||||
|
||||
operator Flags () const {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* output operator for InstanceFlags */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags)
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (iflags == InstanceFlags::NONE) return cout << "NONE";
|
||||
if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE ";
|
||||
if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE ";
|
||||
if (iflags.force_opaque) cout << "FORCE_OPAQUE ";
|
||||
if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE ";
|
||||
#endif
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
Flags flags;
|
||||
struct
|
||||
{
|
||||
bool triangle_cull_disable : 1;
|
||||
bool triangle_front_counterclockwise : 1;
|
||||
bool force_opaque : 1;
|
||||
bool force_non_opaque : 1;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) {
|
||||
return (InstanceFlags::Flags)(int(a) | int(b));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
The instance leaf represent an instance. It essentially stores
|
||||
transformation matrices (local to world as well as world to
|
||||
local) of the instance as well as a pointer to the start node
|
||||
of some BVH.
|
||||
|
||||
The instance leaf consists of two parts, part0 (first 64 bytes)
|
||||
and part1 (second 64 bytes). Part0 will only get accessed by
|
||||
hardware and stores the world to local transformation as well as
|
||||
the BVH node to start traversal. Part1 stores additional data
|
||||
that is only read by the shader, e.g. it stores the local to
|
||||
world transformation of the instance.
|
||||
|
||||
The layout of the first part of the InstanceLeaf is compatible
|
||||
with a ProceduralLeaf, thus we can use the same layout for
|
||||
software instancing if we want.
|
||||
|
||||
*/
|
||||
|
||||
struct InstanceLeaf
|
||||
{
|
||||
InstanceLeaf() {}
|
||||
|
||||
InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask)
|
||||
{
|
||||
part0.shaderIndex = 0; //InstShaderRecordID;
|
||||
part0.geomMask = instMask;
|
||||
|
||||
part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex;
|
||||
part0.pad0 = 0;
|
||||
part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
|
||||
part0.geomFlags = (uint32_t) GeometryFlags::NONE;
|
||||
|
||||
part0.startNodePtr = startNodePtr;
|
||||
assert((startNodePtr >> 48) == 0);
|
||||
part0.instFlags = (InstanceFlags) 0;
|
||||
part0.pad1 = 0;
|
||||
|
||||
part1.instanceID = instUserID;
|
||||
part1.instanceIndex = instID;
|
||||
part1.bvhPtr = (uint64_t) 0;
|
||||
part1.pad = 0;
|
||||
|
||||
part1.obj2world_vx = obj2world.l.vx;
|
||||
part1.obj2world_vy = obj2world.l.vy;
|
||||
part1.obj2world_vz = obj2world.l.vz;
|
||||
part0.obj2world_p = obj2world.p;
|
||||
|
||||
const AffineSpace3f world2obj = rcp(obj2world);
|
||||
part0.world2obj_vx = world2obj.l.vx;
|
||||
part0.world2obj_vy = world2obj.l.vy;
|
||||
part0.world2obj_vz = world2obj.l.vz;
|
||||
part1.world2obj_p = world2obj.p;
|
||||
}
|
||||
|
||||
/* Returns the address of the start node pointer. We need this
|
||||
* address to calculate relocation tables when dumping the BVH to
|
||||
* disk. */
|
||||
const uint64_t startNodePtrAddr() const {
|
||||
return (uint64_t)((char*)&part0 + 8);
|
||||
}
|
||||
|
||||
/* Returns the address of the BVH that contains the start node. */
|
||||
const uint64_t bvhPtrAddr() const {
|
||||
return (uint64_t)&part1;
|
||||
}
|
||||
|
||||
/* returns the world to object space transformation matrix. */
|
||||
const AffineSpace3f World2Obj() const {
|
||||
return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p);
|
||||
}
|
||||
|
||||
/* returns the object to world space transformation matrix. */
|
||||
const AffineSpace3f Obj2World() const {
|
||||
return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p);
|
||||
}
|
||||
|
||||
/* output operator for instance leaf */
|
||||
void print (std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl;
|
||||
else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl;
|
||||
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl;
|
||||
cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl;
|
||||
cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl;
|
||||
cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl;
|
||||
cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl;
|
||||
cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl;
|
||||
cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl;
|
||||
cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl;
|
||||
cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl;
|
||||
cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl;
|
||||
cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl;
|
||||
cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl;
|
||||
cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl;
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* output operator for InstanceLeaf */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) {
|
||||
leaf.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
/* first 64 bytes accessed during traversal by hardware */
|
||||
struct Part0
|
||||
{
|
||||
/* Checks if opaque culling is enabled. */
|
||||
bool opaqueCullingEnabled() const {
|
||||
return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
|
||||
uint32_t instanceContributionToHitGroupIndex : 24;
|
||||
uint32_t pad0 : 5;
|
||||
|
||||
/* the following two entries are only used for procedural instances */
|
||||
/*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling
|
||||
/*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances
|
||||
|
||||
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
|
||||
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
|
||||
uint64_t pad1 : 8; // unused bits
|
||||
|
||||
Vec3f world2obj_vx; // 1st column of Worl2Obj transform
|
||||
Vec3f world2obj_vy; // 2nd column of Worl2Obj transform
|
||||
Vec3f world2obj_vz; // 3rd column of Worl2Obj transform
|
||||
Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes)
|
||||
} part0;
|
||||
|
||||
/* second 64 bytes accessed during shading */
|
||||
struct Part1
|
||||
{
|
||||
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
|
||||
uint64_t pad : 16; // unused bits
|
||||
|
||||
uint32_t instanceID; // user defined value per DXR spec
|
||||
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
|
||||
|
||||
Vec3f obj2world_vx; // 1st column of Obj2World transform
|
||||
Vec3f obj2world_vy; // 2nd column of Obj2World transform
|
||||
Vec3f obj2world_vz; // 3rd column of Obj2World transform
|
||||
Vec3f world2obj_p; // translation of World2Obj transform
|
||||
} part1;
|
||||
};
|
||||
|
||||
static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large");
|
||||
|
||||
|
||||
/*
|
||||
Leaf type for procedural geometry. This leaf only contains the
|
||||
leaf header (which identifices the geometry) and a list of
|
||||
primitive indices.
|
||||
|
||||
The BVH will typically reference only some of the primitives
|
||||
stores inside this leaf. The range is specified by a start
|
||||
primitive and the last primitive is tagged with a bit.
|
||||
|
||||
*/
|
||||
|
||||
struct ProceduralLeaf
|
||||
{
|
||||
static const uint32_t N = 13;
|
||||
|
||||
/* Creates an empty procedural leaf. */
|
||||
ProceduralLeaf ()
|
||||
: leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0)
|
||||
{
|
||||
for (auto& id : _primIndex) id = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
/* Creates a procedural leaf with one primitive. More primitives
|
||||
* of the same geometry can get added later using the add
|
||||
* function. */
|
||||
|
||||
ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last)
|
||||
: leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE)
|
||||
{
|
||||
for (auto& id : _primIndex) id = 0xFFFFFFFF;
|
||||
_primIndex[0] = primIndex;
|
||||
}
|
||||
|
||||
/* returns the number of primitives stored inside this leaf */
|
||||
uint32_t size() const {
|
||||
return numPrimitives;
|
||||
}
|
||||
|
||||
/* Calculates the effectively used bytes. */
|
||||
size_t usedBytes() const
|
||||
{
|
||||
/*if (leafDesc.isProceduralInstance())
|
||||
return sizeof(InstanceLeaf);
|
||||
else*/
|
||||
return sizeof(PrimLeafDesc)+4+4*numPrimitives;
|
||||
}
|
||||
|
||||
/* if possible adds a new primitive to this leaf */
|
||||
bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in)
|
||||
{
|
||||
assert(primIndex_in != 0xFFFFFFFF);
|
||||
if (numPrimitives >= N) return false;
|
||||
if (!numPrimitives) leafDesc = leafDesc_in;
|
||||
if (leafDesc != leafDesc_in) return false;
|
||||
_primIndex[numPrimitives] = primIndex_in;
|
||||
if (last_in) last |= 1 << numPrimitives;
|
||||
else last &= ~(1 << numPrimitives);
|
||||
numPrimitives++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* returns the primitive index of the i'th primitive */
|
||||
uint32_t primIndex(uint32_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
return _primIndex[i];
|
||||
}
|
||||
|
||||
/* checks if the i'th primitive is the last in a leaf list */
|
||||
bool isLast(uint32_t i) const {
|
||||
if (i >= N) return true; // just to make some verify tests happy
|
||||
else return (last >> i) & 1;
|
||||
}
|
||||
|
||||
/* output operator for procedural leaf */
|
||||
void print (std::ostream& cout, uint32_t i, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "ProceduralLeaf {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " slot = " << i << std::endl;
|
||||
if (i < N) {
|
||||
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl;
|
||||
} else {
|
||||
cout << tab(depth) << " INVALID" << std::endl;
|
||||
}
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
PrimLeafDesc leafDesc; // leaf header identifying the geometry
|
||||
uint32_t numPrimitives : 4; // number of stored primitives
|
||||
uint32_t pad : 32-4-N;
|
||||
uint32_t last : N; // bit vector with a last bit per primitive
|
||||
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
|
||||
};
|
||||
|
||||
static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large");
|
||||
}
|
||||
56
Framework/external/embree/kernels/rthwif/rtbuild/node_type.h
vendored
Normal file
56
Framework/external/embree/kernels/rthwif/rtbuild/node_type.h
vendored
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* The type of a node. */
|
||||
enum NodeType : uint8_t
|
||||
{
|
||||
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
|
||||
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
|
||||
NODE_TYPE_INSTANCE = 0x1, // instance leaf
|
||||
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
|
||||
NODE_TYPE_QUAD = 0x4, // quad leaf
|
||||
NODE_TYPE_INVALID = 0x7 // indicates invalid node
|
||||
};
|
||||
|
||||
/* output operator for NodeType */
|
||||
inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type)
|
||||
{
|
||||
#if !defined(__RTRT_GSIM)
|
||||
switch (_type)
|
||||
{
|
||||
case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break;
|
||||
case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break;
|
||||
case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break;
|
||||
case NODE_TYPE_QUAD: _cout << "QUAD"; break;
|
||||
case NODE_TYPE_INVALID: _cout << "INVALID"; break;
|
||||
default: _cout << "INVALID NODE TYPE"; break;
|
||||
}
|
||||
#endif
|
||||
return _cout;
|
||||
};
|
||||
|
||||
/*
|
||||
Sub-type definition for each NodeType
|
||||
*/
|
||||
|
||||
enum SubType : uint8_t
|
||||
{
|
||||
SUB_TYPE_NONE = 0,
|
||||
|
||||
/* sub-type for NODE_TYPE_INTERNAL */
|
||||
SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children
|
||||
|
||||
/* Sub-type for NODE_TYPE_QUAD */
|
||||
SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes)
|
||||
|
||||
/* Sub-type for NODE_TYPE_PROCEDURAL */
|
||||
SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf
|
||||
};
|
||||
}
|
||||
265
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.cpp
vendored
Normal file
265
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.cpp
vendored
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "qbvh6.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename InternalNode>
|
||||
void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area)
|
||||
{
|
||||
InternalNode* inner = node.innerNode<InternalNode>();
|
||||
|
||||
size_t size = 0;
|
||||
for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++)
|
||||
{
|
||||
if (inner->valid(i))
|
||||
{
|
||||
size++;
|
||||
computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN);
|
||||
}
|
||||
}
|
||||
|
||||
/* update BVH statistics */
|
||||
stats.internalNode.numNodes++;
|
||||
stats.internalNode.numChildrenUsed += size;
|
||||
stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN;
|
||||
stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.internalNode.numBytes += sizeof(InternalNode);
|
||||
}
|
||||
|
||||
void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case NODE_TYPE_INSTANCE:
|
||||
{
|
||||
stats.instanceLeaf.numLeaves++;
|
||||
stats.instanceLeaf.numPrimsUsed++;
|
||||
stats.instanceLeaf.numPrimsTotal++;
|
||||
stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf);
|
||||
stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf);
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_QUAD:
|
||||
{
|
||||
bool last = false;
|
||||
stats.quadLeaf.numLeaves++;
|
||||
|
||||
do
|
||||
{
|
||||
QuadLeaf* quad = node.leafNodeQuad();
|
||||
node.node += sizeof(QuadLeaf);
|
||||
last = quad->isLast();
|
||||
|
||||
stats.quadLeaf.numPrimsUsed += quad->size();
|
||||
stats.quadLeaf.numPrimsTotal += 2;
|
||||
stats.quadLeaf.numBytesUsed += quad->usedBytes();
|
||||
stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf);
|
||||
stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area;
|
||||
|
||||
} while (!last);
|
||||
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_PROCEDURAL:
|
||||
{
|
||||
/*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!?
|
||||
{
|
||||
stats.proceduralLeaf.numLeaves++;
|
||||
stats.proceduralLeaf.numPrimsUsed += 1;
|
||||
stats.proceduralLeaf.numPrimsTotal += 1;
|
||||
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf);
|
||||
stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf);
|
||||
}
|
||||
else*/
|
||||
{
|
||||
bool last = false;
|
||||
uint32_t currPrim = node.cur_prim;
|
||||
stats.proceduralLeaf.numLeaves++;
|
||||
|
||||
do
|
||||
{
|
||||
ProceduralLeaf* leaf = node.leafNodeProcedural();
|
||||
last = leaf->isLast(currPrim);
|
||||
|
||||
if (currPrim == 0) {
|
||||
stats.proceduralLeaf.numBlocks++;
|
||||
stats.proceduralLeaf.numBytesUsed += leaf->usedBytes();
|
||||
stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
uint32_t primsInBlock = leaf->size();
|
||||
|
||||
stats.proceduralLeaf.numPrimsUsed++;
|
||||
stats.proceduralLeaf.numPrimsTotal++;
|
||||
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
|
||||
if (++currPrim >= primsInBlock) {
|
||||
currPrim = 0;
|
||||
node.node += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
} while (!last);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_INTERNAL:
|
||||
{
|
||||
computeInternalNodeStatistics<QBVH6::InternalNode6>(stats, node, time_range, node_bounds_area, root_bounds_area);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
BVHStatistics QBVH6::computeStatistics() const
|
||||
{
|
||||
BVHStatistics stats;
|
||||
if (empty()) return stats;
|
||||
embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6);
|
||||
return stats;
|
||||
}
|
||||
|
||||
template<typename QInternalNode>
|
||||
void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
|
||||
{
|
||||
QInternalNode* inner = node.innerNode<QInternalNode>();
|
||||
inner->print(cout, depth, false);
|
||||
std::cout << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++)
|
||||
{
|
||||
if (inner->valid(i))
|
||||
print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN);
|
||||
}
|
||||
|
||||
cout << tab(depth) << "}" << std::endl;
|
||||
}
|
||||
|
||||
void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case NODE_TYPE_INSTANCE: {
|
||||
node.leafNodeInstance()->print(cout,depth);
|
||||
cout << std::endl;
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_QUAD:
|
||||
{
|
||||
std::cout << tab(depth) << "List {" << std::endl;
|
||||
|
||||
bool last = false;
|
||||
|
||||
do
|
||||
{
|
||||
QuadLeaf* quad = node.leafNodeQuad();
|
||||
node.node += sizeof(QuadLeaf);
|
||||
last = quad->isLast();
|
||||
|
||||
quad->print(cout,depth+1);
|
||||
std::cout << std::endl;
|
||||
|
||||
} while (!last);
|
||||
|
||||
std::cout << tab(depth) << "}" << std::endl;
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_PROCEDURAL:
|
||||
{
|
||||
/*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled())
|
||||
{
|
||||
InstanceLeaf* leaf = (InstanceLeaf*) node.node;
|
||||
leaf->print(cout,depth+1);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
else*/
|
||||
{
|
||||
std::cout << tab(depth) << "List {" << std::endl;
|
||||
|
||||
bool last = false;
|
||||
uint32_t currPrim = node.cur_prim;
|
||||
|
||||
do
|
||||
{
|
||||
ProceduralLeaf* leaf = node.leafNodeProcedural();
|
||||
last = leaf->isLast(currPrim);
|
||||
|
||||
uint32_t primsInBlock = leaf->size();
|
||||
|
||||
leaf->print(cout,currPrim,depth+1);
|
||||
std::cout << std::endl;
|
||||
|
||||
if (++currPrim >= primsInBlock) {
|
||||
currPrim = 0;
|
||||
node.node += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
} while (!last);
|
||||
|
||||
std::cout << tab(depth) << "}" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_INTERNAL:
|
||||
{
|
||||
printInternalNodeStatistics<QBVH6::InternalNode6>(cout, node, depth, numChildren);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "{ INVALID_NODE }" << std::endl;
|
||||
//assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function
|
||||
return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart);
|
||||
}
|
||||
|
||||
unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function
|
||||
return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned);
|
||||
}
|
||||
|
||||
uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
|
||||
return 64 * uint64_t(base->nodeDataStart + idx);
|
||||
}
|
||||
|
||||
uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
|
||||
return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6));
|
||||
}
|
||||
|
||||
void QBVH6::print ( std::ostream& cout ) const
|
||||
{
|
||||
|
||||
cout << "QBVH @ "<< this <<" header: {\n";
|
||||
cout << " rootNodeOffset = " << rootNodeOffset << std::endl;
|
||||
cout << " bounds = " << bounds << std::endl;
|
||||
cout << " nodeDataStart = " << nodeDataStart << std::endl;
|
||||
cout << " nodeDataCur = " << nodeDataCur << std::endl;
|
||||
cout << " leafDataStart = " << leafDataCur << std::endl;
|
||||
cout << " leafDataCur = " << leafDataCur << std::endl;
|
||||
cout << " proceduralDataStart = " << proceduralDataStart << std::endl;
|
||||
cout << " proceduralDataCur = " << proceduralDataCur << std::endl;
|
||||
cout << " backPointerDataStart = " << backPointerDataStart << std::endl;
|
||||
cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl;
|
||||
cout << " numPrims = " << numPrims << std::endl;
|
||||
cout << "}" << std::endl;
|
||||
|
||||
if (empty()) return;
|
||||
|
||||
print(cout,root(),0,6);
|
||||
|
||||
if (hasBackPointers())
|
||||
{
|
||||
cout << "backpointers: {\n";
|
||||
for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) {
|
||||
cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n";
|
||||
}
|
||||
cout << "}\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
230
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.h
vendored
Normal file
230
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.h
vendored
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "qnode.h"
|
||||
#include "statistics.h"
|
||||
#include "rtbuild.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*
|
||||
|
||||
The QBVH6 structure defines the bounding volume hierarchy (BVH)
|
||||
that is used by the hardware. It is a BVH with 6-wide branching
|
||||
factor, and quantized bounding boxes. At the leaf level quads
|
||||
(QuadLeaf type), procedural geometries (ProceduralLeaf
|
||||
type), and instances (InstanceLeaf type) can get referenced.
|
||||
|
||||
*/
|
||||
|
||||
inline constexpr size_t roundOffsetTo128(size_t offset) {
|
||||
return 2 * ((offset + 127) / 128);
|
||||
}
|
||||
|
||||
struct QBVH6
|
||||
{
|
||||
typedef NodeRef Node;
|
||||
typedef InternalNode<InternalNode6Data> InternalNode6;
|
||||
|
||||
static constexpr uint64_t rootNodeOffset = 128;
|
||||
|
||||
static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large");
|
||||
|
||||
/* structure used to initialize the memory allocator inside the BVH */
|
||||
struct SizeEstimate
|
||||
{
|
||||
SizeEstimate ()
|
||||
: nodeBytes(0), leafBytes(0), proceduralBytes(0) {}
|
||||
|
||||
SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes)
|
||||
: nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {}
|
||||
|
||||
size_t bytes() const {
|
||||
return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes;
|
||||
}
|
||||
|
||||
friend bool operator<= (SizeEstimate a, SizeEstimate b)
|
||||
{
|
||||
if (a.nodeBytes > b.nodeBytes) return false;
|
||||
if (a.leafBytes > b.leafBytes) return false;
|
||||
if (a.proceduralBytes > b.proceduralBytes) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b)
|
||||
{
|
||||
return SizeEstimate(a.nodeBytes + b.nodeBytes,
|
||||
a.leafBytes + b.leafBytes,
|
||||
a.proceduralBytes + b.proceduralBytes);
|
||||
}
|
||||
|
||||
/* output operator */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate)
|
||||
{
|
||||
cout << "SizeEstimate {" << std::endl;
|
||||
cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl;
|
||||
cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl;
|
||||
cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl;
|
||||
return cout << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t nodeBytes; // bytes required to store internal nodes
|
||||
size_t leafBytes; // bytes required to store leaf nodes
|
||||
size_t proceduralBytes; // bytes required to store procedural leaf nodes
|
||||
};
|
||||
|
||||
/* Initializes a QBVH6 node with its provided size. The memory for
|
||||
* the QBVH6 structure is overallocated and the allocation size is
|
||||
* provided to the constructor, such that the allocator of the BVH
|
||||
* can get initialized properly. */
|
||||
|
||||
QBVH6(SizeEstimate size)
|
||||
: nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart),
|
||||
leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart),
|
||||
proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart),
|
||||
backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart)
|
||||
{
|
||||
assert(size.nodeBytes % 64 == 0);
|
||||
assert(size.leafBytes % 64 == 0);
|
||||
assert(size.proceduralBytes % 64 == 0);
|
||||
assert(size.bytes() <= (64LL << 32));
|
||||
|
||||
bounds = embree::empty;
|
||||
}
|
||||
|
||||
/* Returns the root node of the BVH */
|
||||
Node root() const {
|
||||
return Node(rootNodeOffset,(uint64_t)this);
|
||||
}
|
||||
|
||||
/* sets root not offset to point to this specified node */
|
||||
void setRootNodeOffset(Node node) {
|
||||
assert(node.cur_prim == 0);
|
||||
uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this;
|
||||
assert(rootNodeOffset == rootNodeOffset1);
|
||||
}
|
||||
|
||||
/* check if BVH is empty */
|
||||
bool empty() const {
|
||||
return root().type == NODE_TYPE_INVALID;
|
||||
}
|
||||
|
||||
/* pretty printing */
|
||||
template<typename QInternalNode>
|
||||
static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6);
|
||||
static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6);
|
||||
void print(std::ostream& cout = std::cout) const;
|
||||
|
||||
/* output operator */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) {
|
||||
qbvh.print(cout); return cout;
|
||||
}
|
||||
|
||||
/* calculates BVH statistics */
|
||||
BVHStatistics computeStatistics() const;
|
||||
|
||||
/*
|
||||
This section implements a simple allocator for BVH data. The
|
||||
BVH data is separated into two section, a section where nodes
|
||||
and leaves in mixed mode are allocated, and a section where
|
||||
only leaves are allocate in fat-leaf mode.
|
||||
|
||||
*/
|
||||
public:
|
||||
|
||||
/* allocate data in the node memory section */
|
||||
char* allocNode(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(nodeDataCur + blocks <= leafDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)nodeDataCur;
|
||||
nodeDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* allocate memory in the leaf memory section */
|
||||
char* allocLeaf(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(leafDataCur + blocks <= proceduralDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)leafDataCur;
|
||||
leafDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* allocate memory in procedural leaf memory section */
|
||||
char* allocProceduralLeaf(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(proceduralDataCur + blocks <= backPointerDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)proceduralDataCur;
|
||||
proceduralDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* returns pointer to node address */
|
||||
char* nodePtr(size_t ofs) {
|
||||
return (char*)this + 64 * size_t(nodeDataStart) + ofs;
|
||||
}
|
||||
/* returns pointer to address for next leaf allocation */
|
||||
char* leafPtr() {
|
||||
return (char*)this + 64 * (size_t)leafDataCur;
|
||||
}
|
||||
|
||||
/* returns the total number of bytes of the BVH */
|
||||
size_t getTotalBytes() const {
|
||||
return 64 * (size_t)backPointerDataEnd;
|
||||
}
|
||||
|
||||
/* returns number of bytes available for node allocations */
|
||||
size_t getFreeNodeBytes() const {
|
||||
return 64 * (size_t)(leafDataStart - nodeDataCur);
|
||||
}
|
||||
|
||||
/* returns number of bytes available for leaf allocations */
|
||||
size_t getFreeLeafBytes() const {
|
||||
return 64 * (size_t)(proceduralDataStart - leafDataCur);
|
||||
}
|
||||
|
||||
/* returns number of bytes available for procedural leaf allocations */
|
||||
size_t getFreeProceduralLeafBytes() const {
|
||||
return 64 * (size_t)(backPointerDataStart - proceduralDataCur);
|
||||
}
|
||||
|
||||
/* returns the bytes used by allocations */
|
||||
size_t getUsedBytes() const {
|
||||
return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes();
|
||||
}
|
||||
|
||||
bool hasBackPointers() const {
|
||||
return backPointerDataStart < backPointerDataEnd;
|
||||
}
|
||||
|
||||
public:
|
||||
ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1;
|
||||
uint32_t reserved1;
|
||||
BBox3f bounds; // bounding box of the BVH
|
||||
|
||||
uint32_t nodeDataStart; // first 64 byte block of node data
|
||||
uint32_t nodeDataCur; // next free 64 byte block for node allocations
|
||||
uint32_t leafDataStart; // first 64 byte block of leaf data
|
||||
uint32_t leafDataCur; // next free 64 byte block for leaf allocations
|
||||
uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data
|
||||
uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations
|
||||
uint32_t backPointerDataStart; // first 64 byte block for back pointers
|
||||
uint32_t backPointerDataEnd; // end of back pointer array
|
||||
uint32_t numTimeSegments = 1;
|
||||
uint32_t numPrims = 0; // number of primitives in this BVH
|
||||
uint32_t reserved[12];
|
||||
uint64_t dispatchGlobalsPtr;
|
||||
};
|
||||
|
||||
static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large");
|
||||
}
|
||||
|
||||
1340
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6_builder_sah.h
vendored
Normal file
1340
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6_builder_sah.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
508
Framework/external/embree/kernels/rthwif/rtbuild/qnode.h
vendored
Normal file
508
Framework/external/embree/kernels/rthwif/rtbuild/qnode.h
vendored
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
#include "leaf.h"
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
|
||||
inline float embree_frexp(float value, int* exp)
|
||||
{
|
||||
// using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results
|
||||
// in an unresolved external symbol "__imp_frexp" error and therefore we
|
||||
// provide a the manual implemetation referenced here
|
||||
// https://en.cppreference.com/w/c/numeric/math/frexp in this case
|
||||
static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations");
|
||||
*exp = (value == 0) ? 0 : (int)(1 + logb(value));
|
||||
return scalbn(value, -(*exp));
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* The NodeRef structure references a node of the BVH. It stores the
|
||||
* pointer to that node as well as the node's type. If a leaf node
|
||||
* is referenced the current primitive to intersect is also
|
||||
* stored. */
|
||||
|
||||
struct NodeRef
|
||||
{
|
||||
NodeRef ()
|
||||
: node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {}
|
||||
|
||||
NodeRef (void* node, NodeType type, uint8_t cur_prim)
|
||||
: node((char*)node), type(type), cur_prim(cur_prim)
|
||||
{
|
||||
assert(cur_prim < 16);
|
||||
}
|
||||
|
||||
/* decode from 64 bit encoding used in MemRay and Instances */
|
||||
NodeRef (uint64_t nodePtr, uint64_t offset = 0)
|
||||
{
|
||||
node = (char*) (nodePtr & ~(uint64_t)0xF) + offset;
|
||||
//type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances
|
||||
type = (NodeType) (nodePtr & 0xF);
|
||||
cur_prim = 0;
|
||||
}
|
||||
|
||||
/* 64 bit encoding used in MemRay and Instances */
|
||||
operator uint64_t() const
|
||||
{
|
||||
//assert(type == NODE_TYPE_INTERNAL);
|
||||
assert(((uint64_t)node & 0xF) == 0);
|
||||
assert(cur_prim == 0);
|
||||
return (uint64_t)node + (uint64_t) type;
|
||||
}
|
||||
|
||||
/* returns the internal node that is referenced */
|
||||
template<typename InternalNode>
|
||||
InternalNode* innerNode() const {
|
||||
assert(type == NODE_TYPE_INTERNAL);
|
||||
return (InternalNode*)node;
|
||||
}
|
||||
|
||||
/* returns the instance leaf node that is referenced */
|
||||
InstanceLeaf* leafNodeInstance() const {
|
||||
assert(type == NODE_TYPE_INSTANCE);
|
||||
return (InstanceLeaf*)node;
|
||||
}
|
||||
|
||||
/* returns the quad leaf node that is referenced */
|
||||
QuadLeaf* leafNodeQuad() const {
|
||||
assert(type == NODE_TYPE_QUAD);
|
||||
return (QuadLeaf*)node;
|
||||
}
|
||||
|
||||
/* returns the procedural leaf node that is referenced */
|
||||
ProceduralLeaf* leafNodeProcedural() const {
|
||||
assert(type == NODE_TYPE_PROCEDURAL);
|
||||
return (ProceduralLeaf*)node;
|
||||
}
|
||||
|
||||
friend bool operator ==(const NodeRef& a, const NodeRef& b) {
|
||||
return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim);
|
||||
}
|
||||
|
||||
friend bool operator !=(const NodeRef& a, const NodeRef& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
#if !defined(__RTRT_GSIM)
|
||||
friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) {
|
||||
return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }";
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
char* node; // pointer to the referenced node
|
||||
NodeType type; // type of the node referenced
|
||||
uint8_t cur_prim : 4; // current primitive referenced in the leaf
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
The internal nodes of the BVH store references to 6 children and
|
||||
quantized bounds for each of these children.
|
||||
|
||||
All children are stored consecutively in memory at a location
|
||||
refered to by the childOffset. To calculate the relative
|
||||
location of the i'th child the size (as encoded in blockIncr) of
|
||||
all the children with index smaller than i has to get added to
|
||||
that childOffset. The calculated offset specifies the signed
|
||||
number of 64 bytes blocks relative to the node address to reach
|
||||
the child.
|
||||
|
||||
If the nodeType is INTERNAL we are in mixed mode and the type of
|
||||
each child is encoded inside the startPrim member. Otherwise we
|
||||
are in fat leaf mode and each child has the same type 'nodeType'
|
||||
and startPrim identifies the primitive where the leaf
|
||||
starts. The leaf spans all primitives from this start primitive
|
||||
to the end primitive which is marked as 'last'.
|
||||
|
||||
The bounding boxes of the children are quantized into a regular
|
||||
3D grid. The world space position of the origin of that grid is
|
||||
stored at full precision in the lower member, while the step
|
||||
size is encoded in the exp_x, exp_y, and exp_z members as power
|
||||
of 2. Thus grid coordinates together with their exponent
|
||||
(xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa
|
||||
and exponent of a floating point number representation without
|
||||
leading zero. Thus the world space position of the bounding
|
||||
planes can get calculated as follows:
|
||||
|
||||
x = lower.x + pow(2,exp_x) * 0.xi
|
||||
y = lower.y + pow(2,exp_y) * 0.yi
|
||||
z = lower.z + pow(2,exp_z) * 0.zi
|
||||
|
||||
As the stored grid coordinates for child bounds are only
|
||||
unsigned 8-bit values, ray/box intersections can get performed
|
||||
with reduced precision.
|
||||
|
||||
The node also stores a mask used for ray filtering. Only rays
|
||||
with (node.nodeMask & ray.rayMask) != 0 are traversed, all
|
||||
others are culled.
|
||||
|
||||
*/
|
||||
|
||||
struct InternalNode6Data
|
||||
{
|
||||
static constexpr uint32_t NUM_CHILDREN = 6;
|
||||
|
||||
Vec3f lower; // world space origin of quantization grid
|
||||
int32_t childOffset; // offset to all children in 64B multiples
|
||||
|
||||
NodeType nodeType; // the type of the node
|
||||
uint8_t pad; // unused byte
|
||||
|
||||
int8_t exp_x; // 2^exp_x is the size of the grid in x dimension
|
||||
int8_t exp_y; // 2^exp_y is the size of the grid in y dimension
|
||||
int8_t exp_z; // 2^exp_z is the size of the grid in z dimension
|
||||
uint8_t nodeMask; // mask used for ray filtering
|
||||
|
||||
struct ChildData
|
||||
{
|
||||
uint8_t blockIncr : 2; // size of child in 64 byte blocks
|
||||
uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
|
||||
uint8_t pad : 2; // unused bits
|
||||
} childData[NUM_CHILDREN];
|
||||
|
||||
uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension
|
||||
uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension
|
||||
uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension
|
||||
uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension
|
||||
uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension
|
||||
uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension
|
||||
};
|
||||
|
||||
static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large");
|
||||
|
||||
template<typename InternalNodeData>
|
||||
struct InternalNodeCommon : public InternalNodeData
|
||||
{
|
||||
using InternalNodeData::NUM_CHILDREN;
|
||||
|
||||
InternalNodeCommon() {
|
||||
}
|
||||
|
||||
InternalNodeCommon(NodeType type)
|
||||
{
|
||||
this->nodeType = type;
|
||||
this->childOffset = 0;
|
||||
this->nodeMask = 0xFF;
|
||||
|
||||
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++)
|
||||
this->childData[i] = { 0, 0, 0 };
|
||||
|
||||
this->lower = Vec3f(0.0f);
|
||||
this->exp_x = 0;
|
||||
this->exp_y = 0;
|
||||
this->exp_z = 0;
|
||||
|
||||
/* set all child bounds to invalid */
|
||||
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) {
|
||||
this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80;
|
||||
this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
/* this function slightly enlarges bounds in order to make traversal watertight */
|
||||
static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) {
|
||||
const float err = ulps*std::numeric_limits<float>::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper)));
|
||||
return enlarge(box, Vec3f(err));
|
||||
}
|
||||
|
||||
/* this function quantizes the provided bounds */
|
||||
const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const
|
||||
{
|
||||
const Vec3f lower = fbounds.lower-base;
|
||||
const Vec3f upper = fbounds.upper-base;
|
||||
float qlower_x = ldexpf(lower.x, -this->exp_x + 8);
|
||||
float qlower_y = ldexpf(lower.y, -this->exp_y + 8);
|
||||
float qlower_z = ldexpf(lower.z, -this->exp_z + 8);
|
||||
float qupper_x = ldexpf(upper.x, -this->exp_x + 8);
|
||||
float qupper_y = ldexpf(upper.y, -this->exp_y + 8);
|
||||
float qupper_z = ldexpf(upper.z, -this->exp_z + 8);
|
||||
assert(qlower_x >= 0.0f && qlower_x <= 255.0f);
|
||||
assert(qlower_y >= 0.0f && qlower_y <= 255.0f);
|
||||
assert(qlower_z >= 0.0f && qlower_z <= 255.0f);
|
||||
assert(qupper_x >= 0.0f && qupper_x <= 255.0f);
|
||||
assert(qupper_y >= 0.0f && qupper_y <= 255.0f);
|
||||
assert(qupper_z >= 0.0f && qupper_z <= 255.0f);
|
||||
qlower_x = min(max(floorf(qlower_x),0.0f),255.0f);
|
||||
qlower_y = min(max(floorf(qlower_y),0.0f),255.0f);
|
||||
qlower_z = min(max(floorf(qlower_z),0.0f),255.0f);
|
||||
qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f);
|
||||
qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f);
|
||||
qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f);
|
||||
BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z));
|
||||
|
||||
/* verify that quantized bounds are conservative */
|
||||
BBox3f dbounds = dequantize_bounds(qbounds, base);
|
||||
dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
|
||||
dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
|
||||
dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
|
||||
dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
|
||||
dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
|
||||
dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
|
||||
assert(subset(fbounds, dbounds));
|
||||
|
||||
return qbounds;
|
||||
}
|
||||
|
||||
/* this function de-quantizes the provided bounds */
|
||||
const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const
|
||||
{
|
||||
const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8);
|
||||
const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8);
|
||||
const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8);
|
||||
const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8);
|
||||
const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8);
|
||||
const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8);
|
||||
return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z));
|
||||
}
|
||||
|
||||
/* Determines if a child is valid. We have only to look at the
|
||||
* topmost bit of lower_x and upper_x to determine if child is
|
||||
* valid */
|
||||
bool valid(int i) const {
|
||||
return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80);
|
||||
}
|
||||
|
||||
/* Determines if the node is in fat leaf mode. */
|
||||
bool isFatLeaf() const {
|
||||
return this->nodeType != NODE_TYPE_MIXED;
|
||||
}
|
||||
|
||||
/* Sets the offset to the child memory. */
|
||||
void setChildOffset(void* childDataPtr)
|
||||
{
|
||||
int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0;
|
||||
assert(childDataOffset % 64 == 0);
|
||||
assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64));
|
||||
this->childOffset = (int32_t)(childDataOffset / 64);
|
||||
}
|
||||
|
||||
/* Sets the type, size, and current primitive of a child */
|
||||
void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim)
|
||||
{
|
||||
// there is no need to store block_delta for last child
|
||||
if (child == NUM_CHILDREN-1) block_delta = 0;
|
||||
|
||||
assert(block_delta < 4);
|
||||
assert(cur_prim < 16);
|
||||
|
||||
if (isFatLeaf())
|
||||
{
|
||||
assert(this->nodeType == childType);
|
||||
this->childData[child].startPrim = cur_prim;
|
||||
this->childData[child].blockIncr = block_delta;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(cur_prim == 0);
|
||||
this->childData[child].startPrim = childType;
|
||||
this->childData[child].blockIncr = block_delta;
|
||||
}
|
||||
}
|
||||
|
||||
void invalidateChild(uint32_t childID)
|
||||
{
|
||||
/* set child bounds to invalid */
|
||||
this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80;
|
||||
this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00;
|
||||
}
|
||||
|
||||
/* Sets child bounds */
|
||||
void setChildBounds(uint32_t childID, const BBox3f& fbounds)
|
||||
{
|
||||
assert(fbounds.lower.x <= fbounds.upper.x);
|
||||
assert(fbounds.lower.y <= fbounds.upper.y);
|
||||
assert(fbounds.lower.z <= fbounds.upper.z);
|
||||
const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower);
|
||||
this->lower_x[childID] = (uint8_t)qbounds.lower.x;
|
||||
this->lower_y[childID] = (uint8_t)qbounds.lower.y;
|
||||
this->lower_z[childID] = (uint8_t)qbounds.lower.z;
|
||||
this->upper_x[childID] = (uint8_t)qbounds.upper.x;
|
||||
this->upper_y[childID] = (uint8_t)qbounds.upper.y;
|
||||
this->upper_z[childID] = (uint8_t)qbounds.upper.z;
|
||||
assert(valid(childID));
|
||||
}
|
||||
|
||||
/* Sets an entire child, including bounds, type, size, and referenced primitive. */
|
||||
void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0)
|
||||
{
|
||||
setChildType(childID, type, block_delta, cur_prim);
|
||||
setChildBounds(childID, fbounds);
|
||||
}
|
||||
|
||||
/* Calculates the byte offset to the child. The offset is
|
||||
* relative to the address this node. */
|
||||
int64_t getChildOffset(uint32_t childID) const
|
||||
{
|
||||
int64_t ofs = this->childOffset;
|
||||
for (uint32_t j = 0; j < childID; j++)
|
||||
ofs += this->childData[j].blockIncr;
|
||||
return 64 * ofs;
|
||||
}
|
||||
|
||||
/* Returns the type of the child. In fat leaf mode the type is
|
||||
* shared between all children, otherwise a per-child type is
|
||||
* encoded inside the startPrim member for each child. */
|
||||
NodeType getChildType(uint32_t childID) const
|
||||
{
|
||||
if (isFatLeaf())
|
||||
return this->nodeType;
|
||||
|
||||
else
|
||||
return (NodeType)(this->childData[childID].startPrim);
|
||||
}
|
||||
|
||||
/* Returns the start primitive of a child. In case of children
|
||||
* in fat-leaf mode, all children are leaves, and the start
|
||||
* primitive specifies the primitive in a leaf block where the
|
||||
* leaf start. */
|
||||
uint32_t getChildStartPrim(uint32_t childID) const
|
||||
{
|
||||
if (isFatLeaf())
|
||||
return this->childData[childID].startPrim;
|
||||
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns a node reference for the given child. This reference
|
||||
* includes the node pointer, type, and start primitive. */
|
||||
NodeRef child(void* This, int childID) const {
|
||||
return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID));
|
||||
}
|
||||
|
||||
NodeRef child(int i) const {
|
||||
return child((void*)this, i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename QInternalNode>
|
||||
struct InternalNode : public InternalNodeCommon<QInternalNode>
|
||||
{
|
||||
using InternalNodeCommon<QInternalNode>::valid;
|
||||
using InternalNodeCommon<QInternalNode>::getChildType;
|
||||
using InternalNodeCommon<QInternalNode>::getChildOffset;
|
||||
using InternalNodeCommon<QInternalNode>::getChildStartPrim;
|
||||
using InternalNodeCommon<QInternalNode>::conservativeBox;
|
||||
using InternalNodeCommon<QInternalNode>::dequantize_bounds;
|
||||
using InternalNodeCommon<QInternalNode>::NUM_CHILDREN;
|
||||
|
||||
InternalNode() {
|
||||
}
|
||||
|
||||
InternalNode (NodeType type)
|
||||
: InternalNodeCommon<QInternalNode>(type) {}
|
||||
|
||||
/* Constructs an internal node. The quantization grid gets
|
||||
* initialized from the provided parent bounds. */
|
||||
InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED)
|
||||
: InternalNode(type)
|
||||
{
|
||||
setNodeBounds(box);
|
||||
}
|
||||
|
||||
void setNodeBounds(BBox3f box)
|
||||
{
|
||||
/* initialize quantization grid */
|
||||
box = conservativeBox(box);
|
||||
const float _ulp = std::numeric_limits<float>::epsilon();
|
||||
const float up = 1.0f + float(_ulp);
|
||||
Vec3f len = box.size() * up;
|
||||
this->lower = box.lower;
|
||||
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
|
||||
int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
|
||||
int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
|
||||
int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
|
||||
#else
|
||||
int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
|
||||
int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
|
||||
int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
|
||||
#endif
|
||||
_exp_x = max(-128,_exp_x); // enlarge too tight bounds
|
||||
_exp_y = max(-128,_exp_y);
|
||||
_exp_z = max(-128,_exp_z);
|
||||
this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127);
|
||||
this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127);
|
||||
this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127);
|
||||
}
|
||||
|
||||
/* dequantizes the bounds of the specified child */
|
||||
const BBox3f bounds(uint32_t childID) const
|
||||
{
|
||||
return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]),
|
||||
Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])),
|
||||
this->lower);
|
||||
}
|
||||
|
||||
const BBox3f bounds() const
|
||||
{
|
||||
BBox3f b = empty;
|
||||
for (size_t i=0; i<NUM_CHILDREN; i++) {
|
||||
if (!valid(i)) continue;
|
||||
b.extend(bounds(i));
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
void copy_to( InternalNode* dst ) const
|
||||
{
|
||||
*dst = *this;
|
||||
dst->setChildOffset((char*)this + getChildOffset(0));
|
||||
}
|
||||
|
||||
#if !defined(__RTRT_GSIM)
|
||||
|
||||
/* output of internal node */
|
||||
void print(std::ostream& cout, uint32_t depth, bool close) const
|
||||
{
|
||||
cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl;
|
||||
cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl;
|
||||
cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CHILDREN; i++)
|
||||
{
|
||||
cout << tab(depth) << " child" << i << " = { ";
|
||||
if (valid(i))
|
||||
{
|
||||
cout << "type = " << getChildType(i);
|
||||
cout << ", offset = " << getChildOffset(i);
|
||||
cout << ", prim = " << getChildStartPrim(i);
|
||||
cout << ", bounds = " << bounds(i);
|
||||
}
|
||||
else {
|
||||
cout << "INVALID";
|
||||
}
|
||||
cout << " }" << std::endl;
|
||||
}
|
||||
|
||||
if (close)
|
||||
cout << tab(depth) << "}";
|
||||
}
|
||||
|
||||
/* output operator for internal node */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) {
|
||||
node.print(cout, 0, true); return cout;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
inline size_t GetInternalNodeSize(uint32_t numChildren)
|
||||
{
|
||||
if (numChildren <= 6)
|
||||
return sizeof(InternalNode6Data);
|
||||
else
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef InternalNode<InternalNode6Data> InternalNode6;
|
||||
}
|
||||
151
Framework/external/embree/kernels/rthwif/rtbuild/quadifier.h
vendored
Normal file
151
Framework/external/embree/kernels/rthwif/rtbuild/quadifier.h
vendored
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/sysinfo.h"
|
||||
#include "sys/vector.h"
|
||||
#include "math/vec2.h"
|
||||
#include "math/vec3.h"
|
||||
#include "math/bbox.h"
|
||||
#include "math/affinespace.h"
|
||||
#else
|
||||
#include "../../common/default.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
enum QuadifierType : uint16_t
|
||||
{
|
||||
QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle
|
||||
QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired
|
||||
QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle
|
||||
QUADIFIER_MAX_DISTANCE = 31,
|
||||
};
|
||||
|
||||
template<typename Ty, size_t N>
|
||||
struct static_deque
|
||||
{
|
||||
__forceinline Ty pop_front() {
|
||||
assert(size());
|
||||
return operator[](begin++);
|
||||
}
|
||||
|
||||
__forceinline void push_back(const Ty& v) {
|
||||
assert(size() < N);
|
||||
operator[](end++) = v;
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
assert(end >= begin);
|
||||
return end-begin;
|
||||
}
|
||||
|
||||
__forceinline bool full() const {
|
||||
return size() == N;
|
||||
}
|
||||
|
||||
__forceinline void erase( size_t j )
|
||||
{
|
||||
assert(j >= begin && j < end);
|
||||
|
||||
/* fast path as we mostly just merge with the subsequent triangle */
|
||||
if (likely(j == begin))
|
||||
begin++;
|
||||
|
||||
/* fastest when left side is small */
|
||||
else if (j-begin < end-j-1) {
|
||||
for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1);
|
||||
begin++;
|
||||
}
|
||||
|
||||
/* fastest if right side is small */
|
||||
else {
|
||||
for (size_t i=j+1; i<end; i++) operator[](i-1) = operator[](i);
|
||||
end--;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline Ty& operator[] ( const size_t i ) { return array[i%N]; }
|
||||
__forceinline const Ty& operator[] ( const size_t i ) const { return array[i%N]; }
|
||||
|
||||
Ty array[N];
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
};
|
||||
|
||||
__forceinline bool pair_triangles(Vec3<uint32_t> a, Vec3<uint32_t> b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2)
|
||||
{
|
||||
const vuint<4> va(a.x,a.y,a.z,0);
|
||||
const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va;
|
||||
const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va;
|
||||
const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va;
|
||||
lb0 = bsf(movemask(mb0));
|
||||
lb1 = bsf(movemask(mb1));
|
||||
lb2 = bsf(movemask(mb2));
|
||||
return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1;
|
||||
}
|
||||
|
||||
template<typename GetTriangleFunc>
|
||||
__forceinline void merge_triangle_window( uint32_t geomID, static_deque<uint32_t,32>& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle )
|
||||
{
|
||||
uint32_t primID0 = triangleWindow.pop_front();
|
||||
|
||||
/* load first triangle */
|
||||
Vec3<uint32_t> tri0 = getTriangle(geomID, primID0);
|
||||
|
||||
/* find a second triangle in triangle window to pair with */
|
||||
for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot )
|
||||
{
|
||||
/* load second triangle */
|
||||
uint32_t primID1 = triangleWindow[slot];
|
||||
Vec3<uint32_t> tri1 = getTriangle(geomID, primID1);
|
||||
|
||||
/* try to pair triangles */
|
||||
uint8_t lb0,lb1,lb2;
|
||||
bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2);
|
||||
|
||||
/* the offset between the triangles cannot be too large as hardware limits bits for offset encode */
|
||||
uint32_t prim_offset = primID1 - primID0;
|
||||
pair &= prim_offset <= QUADIFIER_MAX_DISTANCE;
|
||||
|
||||
/* store pairing if successful */
|
||||
if (pair)
|
||||
{
|
||||
assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED);
|
||||
quads_o[primID0] = (QuadifierType) prim_offset;
|
||||
quads_o[primID1] = QUADIFIER_PAIRED;
|
||||
triangleWindow.erase(slot);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* make a triangle if we fail to find a candiate to pair with */
|
||||
quads_o[primID0] = QUADIFIER_TRIANGLE;
|
||||
}
|
||||
|
||||
template<typename GetTriangleFunc>
|
||||
inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle )
|
||||
{
|
||||
static_deque<uint32_t, 32> triangleWindow;
|
||||
|
||||
size_t numTrianglePairs = 0;
|
||||
for (uint32_t primID=primID0; primID<primID1; primID++)
|
||||
{
|
||||
triangleWindow.push_back(primID);
|
||||
|
||||
if (triangleWindow.full()) {
|
||||
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
|
||||
numTrianglePairs++;
|
||||
}
|
||||
}
|
||||
|
||||
while (triangleWindow.size()) {
|
||||
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
|
||||
numTrianglePairs++;
|
||||
}
|
||||
|
||||
return numTrianglePairs;
|
||||
}
|
||||
}
|
||||
762
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.cpp
vendored
Normal file
762
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.cpp
vendored
Normal file
|
|
@ -0,0 +1,762 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTHWIF_EXPORT_API
|
||||
|
||||
#include "rtbuild.h"
|
||||
#include "qbvh6_builder_sah.h"
|
||||
|
||||
// get definition of debug extension
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
#include "../../level_zero/ze_wrapper.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
using namespace embree::isa;
|
||||
|
||||
static tbb::task_arena g_arena(tbb::this_task_arena::max_concurrency(),tbb::this_task_arena::max_concurrency());
|
||||
|
||||
inline ze_rtas_triangle_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID) {
|
||||
assert(primID < geom->triangleCount);
|
||||
return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride);
|
||||
}
|
||||
|
||||
inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) {
|
||||
assert(vertexID < geom->vertexCount);
|
||||
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
|
||||
}
|
||||
|
||||
inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) {
|
||||
assert(primID < geom->quadCount);
|
||||
return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride);
|
||||
}
|
||||
|
||||
inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) {
|
||||
assert(vertexID < geom->vertexCount);
|
||||
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
|
||||
}
|
||||
|
||||
inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
|
||||
{
|
||||
switch (geom->transformFormat)
|
||||
{
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
default:
|
||||
throw std::runtime_error("invalid transform format");
|
||||
}
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32)
|
||||
throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32");
|
||||
|
||||
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
|
||||
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
|
||||
|
||||
if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified");
|
||||
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32)
|
||||
throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32");
|
||||
|
||||
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
|
||||
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
|
||||
|
||||
if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified");
|
||||
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified");
|
||||
if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified");
|
||||
if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified");
|
||||
if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instanciate specified");
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->triangleCount) return false;
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
if (unlikely(!isvalid(p0))) return false;
|
||||
if (unlikely(!isvalid(p1))) return false;
|
||||
if (unlikely(!isvalid(p2))) return false;
|
||||
|
||||
bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2));
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->quadCount) return false;
|
||||
const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v3 >= geom->vertexCount)) return false;
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
const Vec3f p3 = getVertex(geom,tri.v3);
|
||||
if (unlikely(!isvalid(p0))) return false;
|
||||
if (unlikely(!isvalid(p1))) return false;
|
||||
if (unlikely(!isvalid(p2))) return false;
|
||||
if (unlikely(!isvalid(p3))) return false;
|
||||
|
||||
bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3));
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->primCount) return false;
|
||||
if (geom->pfnGetBoundsCb == nullptr) return false;
|
||||
|
||||
BBox3f bounds;
|
||||
ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS };
|
||||
params.primID = primID;
|
||||
params.primIDCount = 1;
|
||||
params.pGeomUserPtr = geom->pGeomUserPtr;
|
||||
params.pBuildUserPtr = buildUserPtr;
|
||||
params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds;
|
||||
(geom->pfnGetBoundsCb)(¶ms);
|
||||
|
||||
if (unlikely(!isvalid(bounds.lower))) return false;
|
||||
if (unlikely(!isvalid(bounds.upper))) return false;
|
||||
if (unlikely(bounds.empty())) return false;
|
||||
|
||||
bbox = (BBox3f&) bounds;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= 1) return false;
|
||||
if (geom->pAccelerationStructure == nullptr) return false;
|
||||
if (geom->pTransform == nullptr) return false;
|
||||
|
||||
const AffineSpace3fa local2world = getTransform(geom);
|
||||
const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z);
|
||||
const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z);
|
||||
const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper));
|
||||
|
||||
if (unlikely(!isvalid(bounds.lower))) return false;
|
||||
if (unlikely(!isvalid(bounds.upper))) return false;
|
||||
if (unlikely(bounds.empty())) return false;
|
||||
|
||||
bbox = bounds;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename GeometryType>
|
||||
PrimInfo createGeometryPrimRefArray(const GeometryType* geom, void* buildUserPtr, evector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID)
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (uint32_t primID=r.begin(); primID<r.end(); primID++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(geom,primID,bounds,buildUserPtr)) continue;
|
||||
const PrimRef prim(bounds,geomID,primID);
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
typedef struct _zet_base_desc_t
|
||||
{
|
||||
/** [in] type of this structure */
|
||||
ze_structure_type_t stype;
|
||||
|
||||
/** [in,out][optional] must be null or a pointer to an extension-specific structure */
|
||||
const void* pNext;
|
||||
|
||||
} zet_base_desc_t_;
|
||||
|
||||
#define VALIDATE(arg) \
|
||||
{\
|
||||
ze_result_t result = validate(arg);\
|
||||
if (result != ZE_RESULT_SUCCESS) return result; \
|
||||
}
|
||||
|
||||
#define VALIDATE_PTR(arg) \
|
||||
{ \
|
||||
if ((arg) == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
|
||||
} \
|
||||
|
||||
ze_result_t validate(ze_driver_handle_t hDriver)
|
||||
{
|
||||
if (hDriver == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_device_handle_t hDevice)
|
||||
{
|
||||
if (hDevice == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool checkDescChain(zet_base_desc_t_* desc)
|
||||
{
|
||||
/* supporting maximal 1024 to also detect cycles */
|
||||
for (size_t i=0; i<1024; i++) {
|
||||
if (desc->pNext == nullptr) return true;
|
||||
desc = (zet_base_desc_t_*) desc->pNext;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ze_rtas_builder
|
||||
{
|
||||
ze_rtas_builder () {
|
||||
}
|
||||
|
||||
~ze_rtas_builder() {
|
||||
magick = 0x0;
|
||||
}
|
||||
|
||||
bool verify() const {
|
||||
return magick == MAGICK;
|
||||
}
|
||||
|
||||
enum { MAGICK = 0x45FE67E1 };
|
||||
uint32_t magick = MAGICK;
|
||||
};
|
||||
|
||||
ze_result_t validate(ze_rtas_builder_exp_handle_t hBuilder)
|
||||
{
|
||||
if (hBuilder == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
if (!((ze_rtas_builder*)hBuilder)->verify())
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
struct ze_rtas_parallel_operation_t
|
||||
{
|
||||
ze_rtas_parallel_operation_t() {
|
||||
}
|
||||
|
||||
~ze_rtas_parallel_operation_t() {
|
||||
magick = 0x0;
|
||||
}
|
||||
|
||||
ze_result_t verify() const
|
||||
{
|
||||
if (magick != MAGICK)
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
enum { MAGICK = 0xE84567E1 };
|
||||
uint32_t magick = MAGICK;
|
||||
std::atomic<bool> object_in_use = false;
|
||||
ze_result_t errorCode = ZE_RESULT_SUCCESS;
|
||||
tbb::task_group group;
|
||||
};
|
||||
|
||||
ze_result_t validate(ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
|
||||
{
|
||||
if (hParallelOperation == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify();
|
||||
}
|
||||
|
||||
ze_result_t validate(const ze_rtas_builder_exp_desc_t* pDescriptor)
|
||||
{
|
||||
if (pDescriptor == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (!checkDescChain((zet_base_desc_t_*)pDescriptor))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties)
|
||||
{
|
||||
if (pProperties == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProperties))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_format_exp_t rtasFormat)
|
||||
{
|
||||
if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(const ze_rtas_builder_build_op_exp_desc_t* args)
|
||||
{
|
||||
/* check for valid pointers */
|
||||
if (args == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check if input descriptor has proper type */
|
||||
if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)args))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check if acceleration structure format is supported */
|
||||
VALIDATE(args->rtasFormat);
|
||||
|
||||
/* check for valid geometries array */
|
||||
if (args->ppGeometries == nullptr && args->numGeometries > 0)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* validate that number of geometries are in range */
|
||||
if (args->numGeometries > 0x00FFFFFF)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* validate build quality */
|
||||
if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* validate build flags */
|
||||
if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_builder_exp_properties_t* pProp)
|
||||
{
|
||||
/* check for valid pointers */
|
||||
if (pProp == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check if return property has proper type */
|
||||
if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProp))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_parallel_operation_exp_properties_t* pProperties)
|
||||
{
|
||||
/* check for valid pointer */
|
||||
if (pProperties == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check for proper property */
|
||||
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProperties))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE(pDescriptor);
|
||||
VALIDATE_PTR(phBuilder);
|
||||
|
||||
*phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder)
|
||||
{
|
||||
VALIDATE(hBuilder);
|
||||
delete (ze_rtas_builder*) hBuilder;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
|
||||
const ze_rtas_format_exp_t accelFormat,
|
||||
const ze_rtas_format_exp_t otherAccelFormat )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE(accelFormat);
|
||||
VALIDATE(otherAccelFormat);
|
||||
|
||||
/* check if rtas formats are compatible */
|
||||
if (accelFormat == otherAccelFormat)
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
/* report incompatible format */
|
||||
return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE;
|
||||
}
|
||||
|
||||
uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom)
|
||||
{
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1;
|
||||
default : return 0;
|
||||
};
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
ze_rtas_builder_exp_properties_t* pProp)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hBuilder);
|
||||
VALIDATE(args);
|
||||
VALIDATE(pProp);
|
||||
|
||||
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
|
||||
const size_t numGeometries = args->numGeometries;
|
||||
|
||||
auto getSize = [&](uint32_t geomID) -> size_t {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return 0;
|
||||
return getNumPrimitives(geom);
|
||||
};
|
||||
|
||||
auto getType = [&](unsigned int geomID)
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
/* query memory requirements from builder */
|
||||
size_t expectedBytes = 0;
|
||||
size_t worstCaseBytes = 0;
|
||||
size_t scratchBytes = 0;
|
||||
QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes);
|
||||
|
||||
/* fill return struct */
|
||||
pProp->flags = 0;
|
||||
pProp->rtasBufferSizeBytesExpected = expectedBytes;
|
||||
pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes;
|
||||
pProp->scratchBufferSizeBytes = scratchBytes;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t zeRTASBuilderBuildExpBody(const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
|
||||
const uint32_t numGeometries = args->numGeometries;
|
||||
|
||||
/* verify input descriptors */
|
||||
parallel_for(numGeometries,[&](uint32_t geomID) {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return;
|
||||
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
});
|
||||
|
||||
auto getSize = [&](uint32_t geomID) -> size_t {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return 0;
|
||||
return getNumPrimitives(geom);
|
||||
};
|
||||
|
||||
auto getType = [&](unsigned int geomID)
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
auto createPrimRefArray = [&] (evector<PrimRef>& prims, BBox1f time_range, const range<size_t>& r, size_t k, unsigned int geomID) -> PrimInfo
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray((ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray((ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray((ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray((ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags {
|
||||
return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE;
|
||||
};
|
||||
|
||||
auto getTriangle = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle();
|
||||
|
||||
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
|
||||
return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask);
|
||||
};
|
||||
|
||||
auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) {
|
||||
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
return Vec3<uint32_t>(tri.v0,tri.v1,tri.v2);
|
||||
};
|
||||
|
||||
auto getQuad = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID);
|
||||
const Vec3f p0 = getVertex(geom,quad.v0);
|
||||
const Vec3f p1 = getVertex(geom,quad.v1);
|
||||
const Vec3f p2 = getVertex(geom,quad.v2);
|
||||
const Vec3f p3 = getVertex(geom,quad.v3);
|
||||
|
||||
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
|
||||
return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask);
|
||||
};
|
||||
|
||||
auto getProcedural = [&](unsigned int geomID, unsigned int primID) {
|
||||
const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags
|
||||
};
|
||||
|
||||
auto getInstance = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
assert(geometries[geomID]);
|
||||
assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE);
|
||||
const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID];
|
||||
void* accel = geom->pAccelerationStructure;
|
||||
const AffineSpace3fa local2world = getTransform(geom);
|
||||
return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags
|
||||
};
|
||||
|
||||
/* dispatch globals ptr for debugging purposes */
|
||||
void* dispatchGlobalsPtr = nullptr;
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
if (args->pNext) {
|
||||
zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext;
|
||||
if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC) {
|
||||
ze_rtas_builder_build_op_debug_exp_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_exp_desc_t*) next;
|
||||
dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool verbose = false;
|
||||
bool success = QBVH6BuilderSAH::build(numGeometries, nullptr,
|
||||
getSize, getType,
|
||||
createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance,
|
||||
(char*)pRtasBuffer, rtasBufferSizeBytes,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
(BBox3f*) pBounds, pRtasBufferSizeBytes,
|
||||
args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr);
|
||||
if (!success) {
|
||||
return ZE_RESULT_EXP_RTAS_BUILD_RETRY;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
//std::cerr << "caught exception during BVH build: " << e.what() << std::endl;
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hBuilder);
|
||||
VALIDATE(args);
|
||||
VALIDATE_PTR(pScratchBuffer);
|
||||
VALIDATE_PTR(pRtasBuffer);
|
||||
|
||||
/* if parallel operation is provided then execute using thread arena inside task group ... */
|
||||
if (hParallelOperation)
|
||||
{
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
|
||||
if (op->object_in_use.load())
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
|
||||
op->object_in_use.store(true);
|
||||
|
||||
g_arena.execute([&](){ op->group.run([=](){
|
||||
op->errorCode = zeRTASBuilderBuildExpBody(args,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
pRtasBuffer, rtasBufferSizeBytes,
|
||||
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
|
||||
});
|
||||
});
|
||||
return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED;
|
||||
}
|
||||
/* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */
|
||||
else
|
||||
{
|
||||
ze_result_t errorCode = ZE_RESULT_SUCCESS;
|
||||
g_arena.execute([&](){ errorCode = zeRTASBuilderBuildExpBody(args,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
pRtasBuffer, rtasBufferSizeBytes,
|
||||
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
|
||||
});
|
||||
return errorCode;
|
||||
}
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE_PTR(phParallelOperation);
|
||||
|
||||
/* create parallel operation object */
|
||||
*phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
/* delete parallel operation */
|
||||
delete (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hParallelOperation);
|
||||
VALIDATE(pProperties);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
if (!op->object_in_use.load())
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
/* return properties */
|
||||
pProperties->flags = 0;
|
||||
pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
|
||||
{
|
||||
/* check for valid handle */
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
g_arena.execute([&](){ op->group.wait(); });
|
||||
op->object_in_use.store(false); // this is slighty too early
|
||||
return op->errorCode;
|
||||
}
|
||||
}
|
||||
66
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.h
vendored
Normal file
66
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.h
vendored
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2009-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../level_zero/ze_api.h"
|
||||
|
||||
#if !defined(ZE_RTAS_BUILDER_EXP_NAME)
|
||||
#include "../../level_zero/ze_rtas.h"
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
# define RTHWIF_API_EXTERN_C extern "C"
|
||||
#else
|
||||
# define RTHWIF_API_EXTERN_C
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(EMBREE_RTHWIF_STATIC_LIB)
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C
|
||||
#else
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport)
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport)
|
||||
#endif
|
||||
#else
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
|
||||
typedef enum _ze_raytracing_accel_format_internal_t {
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2
|
||||
} ze_raytracing_accel_format_internal_t;
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
|
||||
const ze_rtas_format_exp_t accelFormat,
|
||||
const ze_rtas_format_exp_t otherAccelFormat);
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
ze_rtas_builder_exp_properties_t* pProp);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation );
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties );
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation);
|
||||
|
||||
155
Framework/external/embree/kernels/rthwif/rtbuild/statistics.cpp
vendored
Normal file
155
Framework/external/embree/kernels/rthwif/rtbuild/statistics.cpp
vendored
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "statistics.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class RestoreStreamState
|
||||
{
|
||||
public:
|
||||
RestoreStreamState(std::ostream& iostream)
|
||||
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
|
||||
}
|
||||
|
||||
~RestoreStreamState() {
|
||||
iostream.flags(flags);
|
||||
iostream.precision(precision);
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& iostream;
|
||||
std::ios::fmtflags flags;
|
||||
std::streamsize precision;
|
||||
};
|
||||
|
||||
double ratio(double a, double b)
|
||||
{
|
||||
if (b == 0.0) return 0.0f;
|
||||
else return a/b;
|
||||
}
|
||||
|
||||
double percent(double a, double b) {
|
||||
return 100.0*ratio(a,b);
|
||||
}
|
||||
|
||||
double ratio(size_t a, size_t b) {
|
||||
return ratio(double(a), double(b));
|
||||
}
|
||||
double percent(size_t a, size_t b) {
|
||||
return percent(double(a), double(b));
|
||||
}
|
||||
|
||||
void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
cout << std::setw(7) << numNodes << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << sah();
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
|
||||
cout << std::endl;
|
||||
}
|
||||
|
||||
void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
size_t N = blocks ? numBlocks : numLeaves;
|
||||
cout << std::setw(7) << N << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << sah();
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
|
||||
cout << std::endl;
|
||||
}
|
||||
|
||||
void BVHStatistics::print (std::ostream& cout) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
cout.setf(std::ios::fixed, std::ios::floatfield);
|
||||
cout.fill(' ');
|
||||
|
||||
double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH;
|
||||
size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes();
|
||||
size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves;
|
||||
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
|
||||
|
||||
cout << std::endl;
|
||||
cout << "BVH statistics:" << std::endl;
|
||||
cout << "---------------" << std::endl;
|
||||
cout << " numScenePrimitives = " << numScenePrimitives << std::endl;
|
||||
cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl;
|
||||
cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl;
|
||||
cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl;
|
||||
cout << " numBVHPrimitives = " << totalPrimitives << std::endl;
|
||||
cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl;
|
||||
cout << std::endl;
|
||||
|
||||
cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl;
|
||||
cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl;
|
||||
cout << " total : ";
|
||||
cout << std::setw(7) << totalNodes << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << totalSAH;
|
||||
cout << " 100.00% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB ";
|
||||
cout << " 100.00% ";
|
||||
cout << " 100.00% ";
|
||||
cout << " ";
|
||||
cout << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl;
|
||||
|
||||
LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf;
|
||||
cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true);
|
||||
cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
}
|
||||
|
||||
void BVHStatistics::print_raw(std::ostream& cout) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
|
||||
cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl;
|
||||
|
||||
cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl;
|
||||
cout << "bvh_internal_num = " << internalNode.numNodes << std::endl;
|
||||
cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl;
|
||||
cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl;
|
||||
cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl;
|
||||
|
||||
cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl;
|
||||
|
||||
cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl;
|
||||
|
||||
cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl;
|
||||
}
|
||||
}
|
||||
118
Framework/external/embree/kernels/rthwif/rtbuild/statistics.h
vendored
Normal file
118
Framework/external/embree/kernels/rthwif/rtbuild/statistics.h
vendored
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/platform.h"
|
||||
#else
|
||||
#include "../../../common/sys/platform.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct BVHStatistics
|
||||
{
|
||||
struct NodeStat
|
||||
{
|
||||
NodeStat ( double nodeSAH = 0,
|
||||
size_t numNodes = 0,
|
||||
size_t numChildrenUsed = 0,
|
||||
size_t numChildrenTotal = 0,
|
||||
size_t numBytes = 0)
|
||||
: nodeSAH(nodeSAH),
|
||||
numNodes(numNodes),
|
||||
numChildrenUsed(numChildrenUsed),
|
||||
numChildrenTotal(numChildrenTotal),
|
||||
numBytes(numBytes) {}
|
||||
|
||||
double sah() const { return nodeSAH; }
|
||||
size_t bytes() const { return numBytes; }
|
||||
size_t size() const { return numNodes; }
|
||||
|
||||
double fillRateNom () const { return double(numChildrenUsed); }
|
||||
double fillRateDen () const { return double(numChildrenTotal); }
|
||||
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
|
||||
|
||||
friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
|
||||
{
|
||||
return NodeStat(a.nodeSAH + b.nodeSAH,
|
||||
a.numNodes+b.numNodes,
|
||||
a.numChildrenUsed+b.numChildrenUsed,
|
||||
a.numChildrenTotal+b.numChildrenTotal,
|
||||
a.numBytes+b.numBytes);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const;
|
||||
|
||||
public:
|
||||
double nodeSAH;
|
||||
size_t numNodes;
|
||||
size_t numChildrenUsed;
|
||||
size_t numChildrenTotal;
|
||||
size_t numBytes;
|
||||
};
|
||||
|
||||
struct LeafStat
|
||||
{
|
||||
LeafStat(double leafSAH = 0.0f,
|
||||
size_t numLeaves = 0,
|
||||
size_t numBlocks = 0,
|
||||
size_t numPrimsUsed = 0,
|
||||
size_t numPrimsTotal = 0,
|
||||
size_t numBytesUsed = 0,
|
||||
size_t numBytesTotal = 0)
|
||||
: leafSAH(leafSAH),
|
||||
numLeaves(numLeaves),
|
||||
numBlocks(numBlocks),
|
||||
numPrimsUsed(numPrimsUsed),
|
||||
numPrimsTotal(numPrimsTotal),
|
||||
numBytesUsed(numBytesUsed),
|
||||
numBytesTotal(numBytesTotal) {}
|
||||
|
||||
double sah() const { return leafSAH; }
|
||||
size_t bytes() const { return numBytesTotal; }
|
||||
size_t size() const { return numLeaves; }
|
||||
|
||||
double fillRateNom () const { return double(numPrimsUsed); }
|
||||
double fillRateDen () const { return double(numPrimsTotal); }
|
||||
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
|
||||
|
||||
friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
|
||||
{
|
||||
return LeafStat(a.leafSAH + b.leafSAH,
|
||||
a.numLeaves+b.numLeaves,
|
||||
a.numBlocks+b.numBlocks,
|
||||
a.numPrimsUsed+b.numPrimsUsed,
|
||||
a.numPrimsTotal+b.numPrimsTotal,
|
||||
a.numBytesUsed+b.numBytesUsed,
|
||||
a.numBytesTotal+b.numBytesTotal);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const;
|
||||
|
||||
public:
|
||||
double leafSAH; //!< SAH of the leaves only
|
||||
size_t numLeaves; //!< Number of leaf nodes.
|
||||
size_t numBlocks; //!< Number of blocks referenced
|
||||
size_t numPrimsUsed; //!< Number of active primitives
|
||||
size_t numPrimsTotal; //!< Number of active and inactive primitives
|
||||
size_t numBytesUsed; //!< Number of used bytes
|
||||
size_t numBytesTotal; //!< Number of total bytes of leaves.
|
||||
};
|
||||
|
||||
BVHStatistics ()
|
||||
: numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {}
|
||||
|
||||
void print (std::ostream& cout) const;
|
||||
void print_raw(std::ostream& cout) const;
|
||||
|
||||
size_t numScenePrimitives;
|
||||
size_t numBuildPrimitives;
|
||||
size_t numBuildPrimitivesPostSplit;
|
||||
NodeStat internalNode;
|
||||
LeafStat quadLeaf;
|
||||
LeafStat proceduralLeaf;
|
||||
LeafStat instanceLeaf;
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue