Initial commit.

This commit is contained in:
hal8174 2024-04-23 10:14:24 +02:00
commit d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions

View file

@ -0,0 +1,182 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
cmake_minimum_required(VERSION 3.1.0)
project(ze_raytracing)
INCLUDE(CTest)
SET(RTHWIF_VERSION_MAJOR 4)
SET(RTHWIF_VERSION_MINOR 1)
SET(RTHWIF_VERSION_PATCH 0)
SET(RTHWIF_VERSION ${RTHWIF_VERSION_MAJOR}.${RTHWIF_VERSION_MINOR}.${RTHWIF_VERSION_PATCH})
SET(CMAKE_CXX_STANDARD 17)
IF (NOT DEFINED EMBREE_VERSION_MAJOR)
SET(RTHWIF_STANDALONE ON)
SET(RTHWIF_NAME ze_raytracing)
ADD_DEFINITIONS("-DRTHWIF_STANDALONE")
SET(EMBREE_CMAKEEXPORT_DIR "cmake")
OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON)
SET(EMBREE_RTHWIF_STATIC_LIB OFF)
SET(EMBREE_BUILDER_TBB_STATIC ON)
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" ${CMAKE_MODULE_PATH})
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/../../kernels/config.h.in"
"${PROJECT_SOURCE_DIR}/../../kernels/config.h"
)
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT 1)
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/../../kernels/rtcore_config.h.in"
"${PROJECT_SOURCE_DIR}/../../include/embree4/rtcore_config.h"
)
IF (NOT WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
ENDIF()
ADD_SUBDIRECTORY(../../common/sys sys)
ADD_SUBDIRECTORY(../../common/simd simd)
GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?)
find_package(TBB 2020)
ELSE()
SET(RTHWIF_NAME embree_rthwif)
OPTION(EMBREE_RTHWIF_STATIC_LIB "Build RTHWIF as a static library." ON)
option(EMBREE_BUILDER_TBB_STATIC "Use a staticaly compiled TBB version for the Embree builder for GPU." OFF)
ENDIF()
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
if (EMBREE_RTHWIF_STATIC_LIB)
set(RTHWIF_LIB_TYPE STATIC)
else()
set(RTHWIF_LIB_TYPE SHARED)
endif()
# by default link against the tasking target that has all TBB related
# information we need when TASKING_TBB is used
set(TBB_TARGET tasking)
if (EMBREE_BUILDER_TBB_STATIC OR NOT TASKING_TBB)
####################################################################
# fetch TBB and build static version of it
set(TBB_TARGET tbb)
ADD_DEFINITIONS("-D_CRT_SECURE_NO_WARNINGS")
option(TBB_STRICT "Treat compiler warnings as errors" OFF)
option(TBB_TEST "Enable testing" OFF)
option(TBBMALLOC_BUILD "Enable tbbmalloc build" OFF)
SET(TBB_DIR OFF)
SET(BUILD_SHARED_LIBS OFF)
INCLUDE(FetchContent)
SET(FETCHCONTENT_QUIET OFF)
IF (NOT EMBREE_RTHWIF_TBB_GIT_REPOSITORY) # allow setting this externally
SET(EMBREE_RTHWIF_TBB_GIT_REPOSITORY "https://github.com/oneapi-src/oneTBB.git")
ENDIF()
FetchContent_Declare(
tbb_static
GIT_REPOSITORY ${EMBREE_RTHWIF_TBB_GIT_REPOSITORY}
GIT_TAG v2021.6.0
)
FetchContent_GetProperties(tbb_static)
if(NOT tbb_static_POPULATED)
FetchContent_Populate(tbb_static)
# We want to build tbb_static to link it into embree_rthwif, but don't want to
# install it as part of the Embree install targets.
add_subdirectory(${tbb_static_SOURCE_DIR} ${tbb_static_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
MARK_AS_ADVANCED(FETCHCONTENT_BASE_DIR)
MARK_AS_ADVANCED(FETCHCONTENT_FULLY_DISCONNECTED)
MARK_AS_ADVANCED(FETCHCONTENT_QUIET)
MARK_AS_ADVANCED(FETCHCONTENT_SOURCE_DIR_TBB_STATIC)
MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED)
MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED_TBB_STATIC)
MARK_AS_ADVANCED(TBB4PY_BUILD)
MARK_AS_ADVANCED(TBBMALLOC_BUILD)
MARK_AS_ADVANCED(TBB_BUILD)
MARK_AS_ADVANCED(TBB_CPF)
MARK_AS_ADVANCED(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH)
MARK_AS_ADVANCED(TBB_ENABLE_IPO)
MARK_AS_ADVANCED(TBB_EXAMPLES)
MARK_AS_ADVANCED(TBB_FIND_PACKAGE)
MARK_AS_ADVANCED(TBB_INSTALL_VARS)
MARK_AS_ADVANCED(TBB_NO_APPCONTAINER)
MARK_AS_ADVANCED(TBB_SANITIZE)
MARK_AS_ADVANCED(TBB_STRICT)
MARK_AS_ADVANCED(TBB_TEST)
MARK_AS_ADVANCED(TBB_TEST_SPEC)
MARK_AS_ADVANCED(TBB_VALGRIND_MEMCHECK)
MARK_AS_ADVANCED(TBB_WINDOWS_DRIVER)
ADD_DEFINITIONS(-DTASKING_TBB)
####################################################################
ENDIF()
IF (RTHWIF_STANDALONE)
include(package_ze_raytracing)
INCLUDE(CPack)
ENDIF()
IF (EMBREE_SYCL_RT_VALIDATION_API)
ADD_LIBRARY(embree_rthwif_sycl STATIC rttrace/rttrace_validation.cpp)
SET_PROPERTY(TARGET embree_rthwif_sycl APPEND PROPERTY COMPILE_FLAGS "-DEMBREE_SYCL_SUPPORT")
SET_TARGET_PROPERTIES(embree_rthwif_sycl PROPERTIES COMPILE_FLAGS ${CMAKE_CXX_FLAGS_SYCL})
INSTALL(TARGETS embree_rthwif_sycl EXPORT embree_rthwif_sycl-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
INSTALL(EXPORT embree_rthwif_sycl-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
SET(EMBREE_RTHWIF_SYCL embree_rthwif_sycl)
ENDIF()
IF (NOT EMBREE_SYCL_L0_RTAS_BUILDER)
ADD_LIBRARY(embree_rthwif ${RTHWIF_LIB_TYPE} rtbuild/rtbuild.cpp rtbuild/qbvh6.cpp rtbuild/statistics.cpp)
TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE ${TBB_TARGET} simd sys)
SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES OUTPUT_NAME ${RTHWIF_NAME})
IF (EMBREE_RTHWIF_STATIC_LIB)
TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_RTHWIF_STATIC_LIB)
ENDIF()
TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_SYCL_SUPPORT)
IF (EMBREE_STATIC_LIB OR NOT EMBREE_RTHWIF_STATIC_LIB)
INSTALL(TARGETS embree_rthwif EXPORT ${RTHWIF_NAME}-targets
LIBRARY NAMELINK_SKIP DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
INSTALL(EXPORT ${RTHWIF_NAME}-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
ENDIF()
ENDIF()
ADD_SUBDIRECTORY(testing)

View file

@ -0,0 +1,60 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
INCLUDE(GNUInstallDirs)
##############################################################
# Install Documentation
##############################################################
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../LICENSE.txt" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../CHANGELOG.md" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs.txt" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-TBB.txt" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-OIDN.txt" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-DPCPP.txt" DESTINATION doc COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-oneAPI-DPCPP.txt" DESTINATION doc COMPONENT lib)
##############################################################
# CPack specific stuff
##############################################################
SET(CPACK_PACKAGE_NAME "L0 Ray Tracing Build API")
SET(CPACK_PACKAGE_FILE_NAME "ze_raytracing-${RTHWIF_VERSION}")
SET(CPACK_STRIP_FILES TRUE)
SET(CPACK_PACKAGE_VERSION_MAJOR ${EMBREE_VERSION_MAJOR})
SET(CPACK_PACKAGE_VERSION_MINOR ${EMBREE_VERSION_MINOR})
SET(CPACK_PACKAGE_VERSION_PATCH ${EMBREE_VERSION_PATCH})
SET(CPACK_PACKAGE_VERSION ${EMBREE_VERSION})
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Implements acceleration structure build for L0 ray tracing extension.")
SET(CPACK_PACKAGE_VENDOR "Intel Corporation")
SET(CPACK_PACKAGE_CONTACT embree_support@intel.com)
SET(CPACK_MONOLITHIC_INSTALL 1)
SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library")
SET(CPACK_COMPONENT_LIB_DESCRIPTION "Library")
SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development")
SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Development")
SET(CPACK_COMPONENT_EXAMPLES_DISPLAY_NAME "Examples")
SET(CPACK_COMPONENT_EXAMPLES_DESCRIPTION "Examples")
# Windows specific settings
IF(WIN32)
SET(CPACK_GENERATOR ZIP)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x64.windows")
# MacOSX specific settings
ELSEIF(APPLE)
SET(CPACK_GENERATOR ZIP)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx")
# Linux specific settings
ELSE()
SET(CPACK_GENERATOR TGZ)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux")
ENDIF()

View file

@ -0,0 +1,629 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/sysinfo.h"
#include "sys/vector.h"
#include "math/vec2.h"
#include "math/vec3.h"
#include "math/bbox.h"
#include "math/affinespace.h"
#else
#include "../../../common/sys/sysinfo.h"
#include "../../../common/sys/vector.h"
#include "../../../common/math/vec2.h"
#include "../../../common/math/vec3.h"
#include "../../../common/math/bbox.h"
#include "../../../common/math/lbbox.h"
#include "../../../common/math/affinespace.h"
#endif
#include "node_type.h"
#include <map>
#include <bitset>
namespace embree
{
/*
Internal representation for GeometryFlags.
*/
#undef OPAQUE // Windows defines OPAQUE in gdi.h
enum class GeometryFlags : uint32_t
{
NONE = 0x0,
OPAQUE = 0x1
};
inline bool operator& (GeometryFlags a, GeometryFlags b) {
return (int(a) & int(b)) ? true : false;
}
/* output operator for GeometryFlags */
inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags)
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (gflags == GeometryFlags::NONE) return cout << "NONE";
if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE ";
#endif
return cout;
}
/*
This structure is a header for each leaf type. Only the
InstanceLeaf has a slightly different header.
All primitives inside a leaf are of the same geometry, thus have
the same geometry index (geomIndex), the same shader index
(shaderIndex), the same geometry mask (geomMask), and the same
geometry flags (geomFlags).
The shaderIndex is used to calculate the shader record to
invoke. This is an extension to DXR where the geomIndex is used
for that purpose. For DXR we can always set the shaderIndex to be
equal to the geomIndex.
*/
struct PrimLeafDesc
{
static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF;
static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF;
enum Type : uint32_t
{
TYPE_NONE = 0,
/* For a node type of NODE_TYPE_PROCEDURAL we support enabling
* and disabling the opaque/non_opaque culling. */
TYPE_OPACITY_CULLING_ENABLED = 0,
TYPE_OPACITY_CULLING_DISABLED = 1
};
PrimLeafDesc() {}
PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE)
: shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags)
{
if (shaderIndex > MAX_SHADER_INDEX)
throw std::runtime_error("too large shader ID");
if (geomIndex > MAX_GEOM_INDEX)
throw std::runtime_error("too large geometry ID");
}
/* compares two PrimLeafDesc's for equality */
friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b)
{
if (a.geomIndex != b.geomIndex) return false;
assert(a.shaderIndex == b.shaderIndex);
assert(a.geomMask == b.geomMask);
assert(a.type == b.type);
assert(a.geomFlags == b.geomFlags);
return true;
}
friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) {
return !(a == b);
}
void print(std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "PrimLeafDesc {" << std::endl;
cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << geomIndex << std::endl;
cout << tab(depth) << "}";
#endif
}
friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) {
desc.print(cout,0); return cout;
}
/* Checks if opaque culling is enabled. */
bool opaqueCullingEnabled() const {
return type == TYPE_OPACITY_CULLING_ENABLED;
}
/* procedural instances store some valid shader index */
bool isProceduralInstance() const {
return shaderIndex != 0xFFFFFF;
}
/* returns geometry flags */
GeometryFlags getGeomFlags() const {
return (GeometryFlags) geomFlags;
}
public:
uint32_t shaderIndex : 24; // shader index used for shader record calculations
uint32_t geomMask : 8; // geometry mask used for ray masking
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
/*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances
/*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry
};
/*
The QuadLeaf structure stores a single quad. A quad is a triangle
pair with a shared edge. The first triangle has vertices v0,v1,v2,
while the second triangle has vertices v[j0],v[j1],v[j2], thus the
second triangle used local triangle indices.
*/
struct QuadLeaf
{
QuadLeaf() {}
QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3,
uint8_t j0, uint8_t j1, uint8_t j2,
uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1,
GeometryFlags gflags, uint32_t geomMask, bool last)
: leafDesc(shaderIndex,geomIndex,gflags,geomMask),
primIndex0(primIndex0),
primIndex1Delta(primIndex1-primIndex0), pad1(0),
j0(j0),j1(j1),j2(j2),last(last),pad(0),
v0(v0), v1(v1), v2(v2), v3(v3)
{
/* There are some constraints on the primitive indices. The
* second primitive index always has to be the largest and the
* distance between them can be at most 0xFFFF as we use 16 bits
* to encode that difference. */
assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF);
}
/* returns the i'th vertex */
__forceinline Vec3f vertex(size_t i) const {
assert(i < 4); return (&v0)[i];
}
/* Checks if the specified triange is the last inside a leaf
* list. */
bool isLast(uint32_t i = 1) const
{
assert(i<2);
if (i == 0) return false; // the first triangle is never the last
else return last; // the last bit tags the second triangle to be last
}
/* Checks if the second triangle exists. */
bool valid2() const {
return !(j0 == 0 && j1 == 0 && j2 == 0);
}
/* Calculates the number of stored triangles. */
size_t size() const {
return 1 + valid2();
}
/* Calculates the effectively used bytes. If we store only one
* triangle we waste the storage of one vertex. */
size_t usedBytes() const
{
if (valid2()) return sizeof(QuadLeaf);
else return sizeof(QuadLeaf)-sizeof(Vec3f);
}
/* Calculates to delta to add to primIndex0 to get the primitive
* index of the i'th triangle. */
uint32_t primIndexDelta(uint32_t i) const
{
assert(i<2);
return i*primIndex1Delta;
}
/* Calculates the primitive index of the i'th triangle. */
uint32_t primIndex(uint32_t i) const
{
assert(i<2);
return primIndex0 + primIndexDelta(i);
}
/* Quad mode is a special mode where the uv's over the quad are
* defined over the entire range [0,1]x[0,1]. */
bool quadMode() const {
return primIndex1Delta == 0;
}
/* Calculates the bounding box of this leaf. */
BBox3f bounds() const
{
BBox3f b = empty;
b.extend(v0);
b.extend(v1);
b.extend(v2);
if (valid2())
b.extend(v3);
return b;
}
/* output of quad leaf */
void print(std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "QuadLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
cout << tab(depth) << " triangle0 = { " << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl;
cout << tab(depth) << " v0 = " << v0 << std::endl;
cout << tab(depth) << " v1 = " << v1 << std::endl;
cout << tab(depth) << " v2 = " << v2 << std::endl;
cout << tab(depth) << " }" << std::endl;
if (valid2()) {
cout << tab(depth) << " triangle1 = { " << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl;
cout << tab(depth) << " v0 = " << vertex(j0) << std::endl;
cout << tab(depth) << " v1 = " << vertex(j1) << std::endl;
cout << tab(depth) << " v2 = " << vertex(j2) << std::endl;
cout << tab(depth) << " }" << std::endl;
}
cout << tab(depth) << "}";
#endif
}
/* output operator for QuadLeaf */
friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) {
leaf.print(cout,0); return cout;
}
public:
PrimLeafDesc leafDesc; // the leaf header
uint32_t primIndex0; // primitive index of first triangle
struct {
uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle
uint32_t pad1 : 11; // MBZ
uint32_t j0 : 2; // specifies first vertex of second triangle
uint32_t j1 : 2; // specified second vertex of second triangle
uint32_t j2 : 2; // specified third vertex of second triangle
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
uint32_t pad : 9; // unused bits
};
Vec3f v0; // first vertex of first triangle
Vec3f v1; // second vertex of first triangle
Vec3f v2; // third vertex of first triangle
Vec3f v3; // forth vertex used for second triangle
};
static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large");
/*
Internal instance flags definition.
*/
struct InstanceFlags
{
enum Flags : uint8_t
{
NONE = 0x0,
TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags
TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles
FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque
FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque
};
InstanceFlags() {}
InstanceFlags(Flags rflags)
: flags(rflags) {}
InstanceFlags(uint8_t rflags)
: flags((Flags)rflags) {}
operator Flags () const {
return flags;
}
/* output operator for InstanceFlags */
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags)
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (iflags == InstanceFlags::NONE) return cout << "NONE";
if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE ";
if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE ";
if (iflags.force_opaque) cout << "FORCE_OPAQUE ";
if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE ";
#endif
return cout;
}
public:
union
{
Flags flags;
struct
{
bool triangle_cull_disable : 1;
bool triangle_front_counterclockwise : 1;
bool force_opaque : 1;
bool force_non_opaque : 1;
};
};
};
inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) {
return (InstanceFlags::Flags)(int(a) | int(b));
}
/*
The instance leaf represent an instance. It essentially stores
transformation matrices (local to world as well as world to
local) of the instance as well as a pointer to the start node
of some BVH.
The instance leaf consists of two parts, part0 (first 64 bytes)
and part1 (second 64 bytes). Part0 will only get accessed by
hardware and stores the world to local transformation as well as
the BVH node to start traversal. Part1 stores additional data
that is only read by the shader, e.g. it stores the local to
world transformation of the instance.
The layout of the first part of the InstanceLeaf is compatible
with a ProceduralLeaf, thus we can use the same layout for
software instancing if we want.
*/
struct InstanceLeaf
{
InstanceLeaf() {}
InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask)
{
part0.shaderIndex = 0; //InstShaderRecordID;
part0.geomMask = instMask;
part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex;
part0.pad0 = 0;
part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
part0.geomFlags = (uint32_t) GeometryFlags::NONE;
part0.startNodePtr = startNodePtr;
assert((startNodePtr >> 48) == 0);
part0.instFlags = (InstanceFlags) 0;
part0.pad1 = 0;
part1.instanceID = instUserID;
part1.instanceIndex = instID;
part1.bvhPtr = (uint64_t) 0;
part1.pad = 0;
part1.obj2world_vx = obj2world.l.vx;
part1.obj2world_vy = obj2world.l.vy;
part1.obj2world_vz = obj2world.l.vz;
part0.obj2world_p = obj2world.p;
const AffineSpace3f world2obj = rcp(obj2world);
part0.world2obj_vx = world2obj.l.vx;
part0.world2obj_vy = world2obj.l.vy;
part0.world2obj_vz = world2obj.l.vz;
part1.world2obj_p = world2obj.p;
}
/* Returns the address of the start node pointer. We need this
* address to calculate relocation tables when dumping the BVH to
* disk. */
const uint64_t startNodePtrAddr() const {
return (uint64_t)((char*)&part0 + 8);
}
/* Returns the address of the BVH that contains the start node. */
const uint64_t bvhPtrAddr() const {
return (uint64_t)&part1;
}
/* returns the world to object space transformation matrix. */
const AffineSpace3f World2Obj() const {
return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p);
}
/* returns the object to world space transformation matrix. */
const AffineSpace3f Obj2World() const {
return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p);
}
/* output operator for instance leaf */
void print (std::ostream& cout, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl;
else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl;
cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl;
cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl;
cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl;
cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl;
cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl;
cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl;
cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl;
cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl;
cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl;
cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl;
cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl;
cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl;
cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl;
cout << tab(depth) << "}";
#endif
}
/* output operator for InstanceLeaf */
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) {
leaf.print(cout,0); return cout;
}
/* first 64 bytes accessed during traversal by hardware */
struct Part0
{
/* Checks if opaque culling is enabled. */
bool opaqueCullingEnabled() const {
return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
}
public:
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
uint32_t geomMask : 8; // geometry mask used for ray masking
uint32_t instanceContributionToHitGroupIndex : 24;
uint32_t pad0 : 5;
/* the following two entries are only used for procedural instances */
/*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling
/*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
uint64_t pad1 : 8; // unused bits
Vec3f world2obj_vx; // 1st column of Worl2Obj transform
Vec3f world2obj_vy; // 2nd column of Worl2Obj transform
Vec3f world2obj_vz; // 3rd column of Worl2Obj transform
Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes)
} part0;
/* second 64 bytes accessed during shading */
struct Part1
{
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
uint64_t pad : 16; // unused bits
uint32_t instanceID; // user defined value per DXR spec
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
Vec3f obj2world_vx; // 1st column of Obj2World transform
Vec3f obj2world_vy; // 2nd column of Obj2World transform
Vec3f obj2world_vz; // 3rd column of Obj2World transform
Vec3f world2obj_p; // translation of World2Obj transform
} part1;
};
static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large");
/*
Leaf type for procedural geometry. This leaf only contains the
leaf header (which identifices the geometry) and a list of
primitive indices.
The BVH will typically reference only some of the primitives
stores inside this leaf. The range is specified by a start
primitive and the last primitive is tagged with a bit.
*/
struct ProceduralLeaf
{
static const uint32_t N = 13;
/* Creates an empty procedural leaf. */
ProceduralLeaf ()
: leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0)
{
for (auto& id : _primIndex) id = 0xFFFFFFFF;
}
/* Creates a procedural leaf with one primitive. More primitives
* of the same geometry can get added later using the add
* function. */
ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last)
: leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE)
{
for (auto& id : _primIndex) id = 0xFFFFFFFF;
_primIndex[0] = primIndex;
}
/* returns the number of primitives stored inside this leaf */
uint32_t size() const {
return numPrimitives;
}
/* Calculates the effectively used bytes. */
size_t usedBytes() const
{
/*if (leafDesc.isProceduralInstance())
return sizeof(InstanceLeaf);
else*/
return sizeof(PrimLeafDesc)+4+4*numPrimitives;
}
/* if possible adds a new primitive to this leaf */
bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in)
{
assert(primIndex_in != 0xFFFFFFFF);
if (numPrimitives >= N) return false;
if (!numPrimitives) leafDesc = leafDesc_in;
if (leafDesc != leafDesc_in) return false;
_primIndex[numPrimitives] = primIndex_in;
if (last_in) last |= 1 << numPrimitives;
else last &= ~(1 << numPrimitives);
numPrimitives++;
return true;
}
/* returns the primitive index of the i'th primitive */
uint32_t primIndex(uint32_t i) const
{
assert(i < N);
return _primIndex[i];
}
/* checks if the i'th primitive is the last in a leaf list */
bool isLast(uint32_t i) const {
if (i >= N) return true; // just to make some verify tests happy
else return (last >> i) & 1;
}
/* output operator for procedural leaf */
void print (std::ostream& cout, uint32_t i, uint32_t depth) const
{
#if !defined(__SYCL_DEVICE_ONLY__)
cout << tab(depth) << "ProceduralLeaf {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " slot = " << i << std::endl;
if (i < N) {
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl;
} else {
cout << tab(depth) << " INVALID" << std::endl;
}
cout << tab(depth) << "}";
#endif
}
public:
PrimLeafDesc leafDesc; // leaf header identifying the geometry
uint32_t numPrimitives : 4; // number of stored primitives
uint32_t pad : 32-4-N;
uint32_t last : N; // bit vector with a last bit per primitive
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
};
static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large");
}

View file

@ -0,0 +1,56 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <cstdint>
#include <iostream>
namespace embree
{
/* The type of a node. */
enum NodeType : uint8_t
{
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
NODE_TYPE_INSTANCE = 0x1, // instance leaf
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
NODE_TYPE_QUAD = 0x4, // quad leaf
NODE_TYPE_INVALID = 0x7 // indicates invalid node
};
/* output operator for NodeType */
inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type)
{
#if !defined(__RTRT_GSIM)
switch (_type)
{
case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break;
case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break;
case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break;
case NODE_TYPE_QUAD: _cout << "QUAD"; break;
case NODE_TYPE_INVALID: _cout << "INVALID"; break;
default: _cout << "INVALID NODE TYPE"; break;
}
#endif
return _cout;
};
/*
Sub-type definition for each NodeType
*/
enum SubType : uint8_t
{
SUB_TYPE_NONE = 0,
/* sub-type for NODE_TYPE_INTERNAL */
SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children
/* Sub-type for NODE_TYPE_QUAD */
SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes)
/* Sub-type for NODE_TYPE_PROCEDURAL */
SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf
};
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "qbvh6.h"
namespace embree
{
template<typename InternalNode>
void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area)
{
InternalNode* inner = node.innerNode<InternalNode>();
size_t size = 0;
for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++)
{
if (inner->valid(i))
{
size++;
computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN);
}
}
/* update BVH statistics */
stats.internalNode.numNodes++;
stats.internalNode.numChildrenUsed += size;
stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN;
stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.internalNode.numBytes += sizeof(InternalNode);
}
void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren)
{
switch (node.type)
{
case NODE_TYPE_INSTANCE:
{
stats.instanceLeaf.numLeaves++;
stats.instanceLeaf.numPrimsUsed++;
stats.instanceLeaf.numPrimsTotal++;
stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf);
stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf);
break;
}
case NODE_TYPE_QUAD:
{
bool last = false;
stats.quadLeaf.numLeaves++;
do
{
QuadLeaf* quad = node.leafNodeQuad();
node.node += sizeof(QuadLeaf);
last = quad->isLast();
stats.quadLeaf.numPrimsUsed += quad->size();
stats.quadLeaf.numPrimsTotal += 2;
stats.quadLeaf.numBytesUsed += quad->usedBytes();
stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf);
stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area;
} while (!last);
break;
}
case NODE_TYPE_PROCEDURAL:
{
/*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!?
{
stats.proceduralLeaf.numLeaves++;
stats.proceduralLeaf.numPrimsUsed += 1;
stats.proceduralLeaf.numPrimsTotal += 1;
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf);
stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf);
}
else*/
{
bool last = false;
uint32_t currPrim = node.cur_prim;
stats.proceduralLeaf.numLeaves++;
do
{
ProceduralLeaf* leaf = node.leafNodeProcedural();
last = leaf->isLast(currPrim);
if (currPrim == 0) {
stats.proceduralLeaf.numBlocks++;
stats.proceduralLeaf.numBytesUsed += leaf->usedBytes();
stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf);
}
uint32_t primsInBlock = leaf->size();
stats.proceduralLeaf.numPrimsUsed++;
stats.proceduralLeaf.numPrimsTotal++;
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
if (++currPrim >= primsInBlock) {
currPrim = 0;
node.node += sizeof(ProceduralLeaf);
}
} while (!last);
}
break;
}
case NODE_TYPE_INTERNAL:
{
computeInternalNodeStatistics<QBVH6::InternalNode6>(stats, node, time_range, node_bounds_area, root_bounds_area);
break;
}
default:
assert(false);
}
}
BVHStatistics QBVH6::computeStatistics() const
{
BVHStatistics stats;
if (empty()) return stats;
embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6);
return stats;
}
template<typename QInternalNode>
void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
{
QInternalNode* inner = node.innerNode<QInternalNode>();
inner->print(cout, depth, false);
std::cout << std::endl;
for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++)
{
if (inner->valid(i))
print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN);
}
cout << tab(depth) << "}" << std::endl;
}
void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
{
switch (node.type)
{
case NODE_TYPE_INSTANCE: {
node.leafNodeInstance()->print(cout,depth);
cout << std::endl;
break;
}
case NODE_TYPE_QUAD:
{
std::cout << tab(depth) << "List {" << std::endl;
bool last = false;
do
{
QuadLeaf* quad = node.leafNodeQuad();
node.node += sizeof(QuadLeaf);
last = quad->isLast();
quad->print(cout,depth+1);
std::cout << std::endl;
} while (!last);
std::cout << tab(depth) << "}" << std::endl;
break;
}
case NODE_TYPE_PROCEDURAL:
{
/*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled())
{
InstanceLeaf* leaf = (InstanceLeaf*) node.node;
leaf->print(cout,depth+1);
std::cout << std::endl;
}
else*/
{
std::cout << tab(depth) << "List {" << std::endl;
bool last = false;
uint32_t currPrim = node.cur_prim;
do
{
ProceduralLeaf* leaf = node.leafNodeProcedural();
last = leaf->isLast(currPrim);
uint32_t primsInBlock = leaf->size();
leaf->print(cout,currPrim,depth+1);
std::cout << std::endl;
if (++currPrim >= primsInBlock) {
currPrim = 0;
node.node += sizeof(ProceduralLeaf);
}
} while (!last);
std::cout << tab(depth) << "}" << std::endl;
}
break;
}
case NODE_TYPE_INTERNAL:
{
printInternalNodeStatistics<QBVH6::InternalNode6>(cout, node, depth, numChildren);
break;
}
default:
std::cout << "{ INVALID_NODE }" << std::endl;
//assert(false);
}
}
unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function
return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart);
}
unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function
return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned);
}
uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
return 64 * uint64_t(base->nodeDataStart + idx);
}
uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6));
}
void QBVH6::print ( std::ostream& cout ) const
{
cout << "QBVH @ "<< this <<" header: {\n";
cout << " rootNodeOffset = " << rootNodeOffset << std::endl;
cout << " bounds = " << bounds << std::endl;
cout << " nodeDataStart = " << nodeDataStart << std::endl;
cout << " nodeDataCur = " << nodeDataCur << std::endl;
cout << " leafDataStart = " << leafDataCur << std::endl;
cout << " leafDataCur = " << leafDataCur << std::endl;
cout << " proceduralDataStart = " << proceduralDataStart << std::endl;
cout << " proceduralDataCur = " << proceduralDataCur << std::endl;
cout << " backPointerDataStart = " << backPointerDataStart << std::endl;
cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl;
cout << " numPrims = " << numPrims << std::endl;
cout << "}" << std::endl;
if (empty()) return;
print(cout,root(),0,6);
if (hasBackPointers())
{
cout << "backpointers: {\n";
for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) {
cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n";
}
cout << "}\n";
}
}
}

View file

@ -0,0 +1,230 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "qnode.h"
#include "statistics.h"
#include "rtbuild.h"
namespace embree
{
/*
The QBVH6 structure defines the bounding volume hierarchy (BVH)
that is used by the hardware. It is a BVH with 6-wide branching
factor, and quantized bounding boxes. At the leaf level quads
(QuadLeaf type), procedural geometries (ProceduralLeaf
type), and instances (InstanceLeaf type) can get referenced.
*/
inline constexpr size_t roundOffsetTo128(size_t offset) {
return 2 * ((offset + 127) / 128);
}
struct QBVH6
{
typedef NodeRef Node;
typedef InternalNode<InternalNode6Data> InternalNode6;
static constexpr uint64_t rootNodeOffset = 128;
static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large");
/* structure used to initialize the memory allocator inside the BVH */
struct SizeEstimate
{
SizeEstimate ()
: nodeBytes(0), leafBytes(0), proceduralBytes(0) {}
SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes)
: nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {}
size_t bytes() const {
return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes;
}
friend bool operator<= (SizeEstimate a, SizeEstimate b)
{
if (a.nodeBytes > b.nodeBytes) return false;
if (a.leafBytes > b.leafBytes) return false;
if (a.proceduralBytes > b.proceduralBytes) return false;
return true;
}
friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b)
{
return SizeEstimate(a.nodeBytes + b.nodeBytes,
a.leafBytes + b.leafBytes,
a.proceduralBytes + b.proceduralBytes);
}
/* output operator */
friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate)
{
cout << "SizeEstimate {" << std::endl;
cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl;
cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl;
cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl;
return cout << "}";
}
public:
size_t nodeBytes; // bytes required to store internal nodes
size_t leafBytes; // bytes required to store leaf nodes
size_t proceduralBytes; // bytes required to store procedural leaf nodes
};
/* Initializes a QBVH6 node with its provided size. The memory for
* the QBVH6 structure is overallocated and the allocation size is
* provided to the constructor, such that the allocator of the BVH
* can get initialized properly. */
QBVH6(SizeEstimate size)
: nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart),
leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart),
proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart),
backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart)
{
assert(size.nodeBytes % 64 == 0);
assert(size.leafBytes % 64 == 0);
assert(size.proceduralBytes % 64 == 0);
assert(size.bytes() <= (64LL << 32));
bounds = embree::empty;
}
/* Returns the root node of the BVH */
Node root() const {
return Node(rootNodeOffset,(uint64_t)this);
}
/* sets root not offset to point to this specified node */
void setRootNodeOffset(Node node) {
assert(node.cur_prim == 0);
uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this;
assert(rootNodeOffset == rootNodeOffset1);
}
/* check if BVH is empty */
bool empty() const {
return root().type == NODE_TYPE_INVALID;
}
/* pretty printing */
template<typename QInternalNode>
static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6);
static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6);
void print(std::ostream& cout = std::cout) const;
/* output operator */
friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) {
qbvh.print(cout); return cout;
}
/* calculates BVH statistics */
BVHStatistics computeStatistics() const;
/*
This section implements a simple allocator for BVH data. The
BVH data is separated into two section, a section where nodes
and leaves in mixed mode are allocated, and a section where
only leaves are allocate in fat-leaf mode.
*/
public:
/* allocate data in the node memory section */
char* allocNode(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(nodeDataCur + blocks <= leafDataStart);
char* ptr = (char*)this + 64 * (size_t)nodeDataCur;
nodeDataCur += blocks;
return ptr;
}
/* allocate memory in the leaf memory section */
char* allocLeaf(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(leafDataCur + blocks <= proceduralDataStart);
char* ptr = (char*)this + 64 * (size_t)leafDataCur;
leafDataCur += blocks;
return ptr;
}
/* allocate memory in procedural leaf memory section */
char* allocProceduralLeaf(size_t bytes)
{
assert(bytes % 64 == 0);
uint32_t blocks = (uint32_t)bytes / 64;
assert(proceduralDataCur + blocks <= backPointerDataStart);
char* ptr = (char*)this + 64 * (size_t)proceduralDataCur;
proceduralDataCur += blocks;
return ptr;
}
/* returns pointer to node address */
char* nodePtr(size_t ofs) {
return (char*)this + 64 * size_t(nodeDataStart) + ofs;
}
/* returns pointer to address for next leaf allocation */
char* leafPtr() {
return (char*)this + 64 * (size_t)leafDataCur;
}
/* returns the total number of bytes of the BVH */
size_t getTotalBytes() const {
return 64 * (size_t)backPointerDataEnd;
}
/* returns number of bytes available for node allocations */
size_t getFreeNodeBytes() const {
return 64 * (size_t)(leafDataStart - nodeDataCur);
}
/* returns number of bytes available for leaf allocations */
size_t getFreeLeafBytes() const {
return 64 * (size_t)(proceduralDataStart - leafDataCur);
}
/* returns number of bytes available for procedural leaf allocations */
size_t getFreeProceduralLeafBytes() const {
return 64 * (size_t)(backPointerDataStart - proceduralDataCur);
}
/* returns the bytes used by allocations */
size_t getUsedBytes() const {
return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes();
}
bool hasBackPointers() const {
return backPointerDataStart < backPointerDataEnd;
}
public:
ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1;
uint32_t reserved1;
BBox3f bounds; // bounding box of the BVH
uint32_t nodeDataStart; // first 64 byte block of node data
uint32_t nodeDataCur; // next free 64 byte block for node allocations
uint32_t leafDataStart; // first 64 byte block of leaf data
uint32_t leafDataCur; // next free 64 byte block for leaf allocations
uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data
uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations
uint32_t backPointerDataStart; // first 64 byte block for back pointers
uint32_t backPointerDataEnd; // end of back pointer array
uint32_t numTimeSegments = 1;
uint32_t numPrims = 0; // number of primitives in this BVH
uint32_t reserved[12];
uint64_t dispatchGlobalsPtr;
};
static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large");
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,508 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <cstdint>
#include <iostream>
#include "leaf.h"
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
inline float embree_frexp(float value, int* exp)
{
// using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results
// in an unresolved external symbol "__imp_frexp" error and therefore we
// provide a the manual implemetation referenced here
// https://en.cppreference.com/w/c/numeric/math/frexp in this case
static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations");
*exp = (value == 0) ? 0 : (int)(1 + logb(value));
return scalbn(value, -(*exp));
}
#endif
namespace embree
{
/* The NodeRef structure references a node of the BVH. It stores the
* pointer to that node as well as the node's type. If a leaf node
* is referenced the current primitive to intersect is also
* stored. */
struct NodeRef
{
NodeRef ()
: node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {}
NodeRef (void* node, NodeType type, uint8_t cur_prim)
: node((char*)node), type(type), cur_prim(cur_prim)
{
assert(cur_prim < 16);
}
/* decode from 64 bit encoding used in MemRay and Instances */
NodeRef (uint64_t nodePtr, uint64_t offset = 0)
{
node = (char*) (nodePtr & ~(uint64_t)0xF) + offset;
//type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances
type = (NodeType) (nodePtr & 0xF);
cur_prim = 0;
}
/* 64 bit encoding used in MemRay and Instances */
operator uint64_t() const
{
//assert(type == NODE_TYPE_INTERNAL);
assert(((uint64_t)node & 0xF) == 0);
assert(cur_prim == 0);
return (uint64_t)node + (uint64_t) type;
}
/* returns the internal node that is referenced */
template<typename InternalNode>
InternalNode* innerNode() const {
assert(type == NODE_TYPE_INTERNAL);
return (InternalNode*)node;
}
/* returns the instance leaf node that is referenced */
InstanceLeaf* leafNodeInstance() const {
assert(type == NODE_TYPE_INSTANCE);
return (InstanceLeaf*)node;
}
/* returns the quad leaf node that is referenced */
QuadLeaf* leafNodeQuad() const {
assert(type == NODE_TYPE_QUAD);
return (QuadLeaf*)node;
}
/* returns the procedural leaf node that is referenced */
ProceduralLeaf* leafNodeProcedural() const {
assert(type == NODE_TYPE_PROCEDURAL);
return (ProceduralLeaf*)node;
}
friend bool operator ==(const NodeRef& a, const NodeRef& b) {
return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim);
}
friend bool operator !=(const NodeRef& a, const NodeRef& b) {
return !(a == b);
}
#if !defined(__RTRT_GSIM)
friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) {
return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }";
}
#endif
public:
char* node; // pointer to the referenced node
NodeType type; // type of the node referenced
uint8_t cur_prim : 4; // current primitive referenced in the leaf
};
/*
The internal nodes of the BVH store references to 6 children and
quantized bounds for each of these children.
All children are stored consecutively in memory at a location
refered to by the childOffset. To calculate the relative
location of the i'th child the size (as encoded in blockIncr) of
all the children with index smaller than i has to get added to
that childOffset. The calculated offset specifies the signed
number of 64 bytes blocks relative to the node address to reach
the child.
If the nodeType is INTERNAL we are in mixed mode and the type of
each child is encoded inside the startPrim member. Otherwise we
are in fat leaf mode and each child has the same type 'nodeType'
and startPrim identifies the primitive where the leaf
starts. The leaf spans all primitives from this start primitive
to the end primitive which is marked as 'last'.
The bounding boxes of the children are quantized into a regular
3D grid. The world space position of the origin of that grid is
stored at full precision in the lower member, while the step
size is encoded in the exp_x, exp_y, and exp_z members as power
of 2. Thus grid coordinates together with their exponent
(xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa
and exponent of a floating point number representation without
leading zero. Thus the world space position of the bounding
planes can get calculated as follows:
x = lower.x + pow(2,exp_x) * 0.xi
y = lower.y + pow(2,exp_y) * 0.yi
z = lower.z + pow(2,exp_z) * 0.zi
As the stored grid coordinates for child bounds are only
unsigned 8-bit values, ray/box intersections can get performed
with reduced precision.
The node also stores a mask used for ray filtering. Only rays
with (node.nodeMask & ray.rayMask) != 0 are traversed, all
others are culled.
*/
struct InternalNode6Data
{
static constexpr uint32_t NUM_CHILDREN = 6;
Vec3f lower; // world space origin of quantization grid
int32_t childOffset; // offset to all children in 64B multiples
NodeType nodeType; // the type of the node
uint8_t pad; // unused byte
int8_t exp_x; // 2^exp_x is the size of the grid in x dimension
int8_t exp_y; // 2^exp_y is the size of the grid in y dimension
int8_t exp_z; // 2^exp_z is the size of the grid in z dimension
uint8_t nodeMask; // mask used for ray filtering
struct ChildData
{
uint8_t blockIncr : 2; // size of child in 64 byte blocks
uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
uint8_t pad : 2; // unused bits
} childData[NUM_CHILDREN];
uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension
uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension
uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension
uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension
uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension
uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension
};
static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large");
template<typename InternalNodeData>
struct InternalNodeCommon : public InternalNodeData
{
using InternalNodeData::NUM_CHILDREN;
InternalNodeCommon() {
}
InternalNodeCommon(NodeType type)
{
this->nodeType = type;
this->childOffset = 0;
this->nodeMask = 0xFF;
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++)
this->childData[i] = { 0, 0, 0 };
this->lower = Vec3f(0.0f);
this->exp_x = 0;
this->exp_y = 0;
this->exp_z = 0;
/* set all child bounds to invalid */
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) {
this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80;
this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00;
}
}
/* this function slightly enlarges bounds in order to make traversal watertight */
static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) {
const float err = ulps*std::numeric_limits<float>::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper)));
return enlarge(box, Vec3f(err));
}
/* this function quantizes the provided bounds */
const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const
{
const Vec3f lower = fbounds.lower-base;
const Vec3f upper = fbounds.upper-base;
float qlower_x = ldexpf(lower.x, -this->exp_x + 8);
float qlower_y = ldexpf(lower.y, -this->exp_y + 8);
float qlower_z = ldexpf(lower.z, -this->exp_z + 8);
float qupper_x = ldexpf(upper.x, -this->exp_x + 8);
float qupper_y = ldexpf(upper.y, -this->exp_y + 8);
float qupper_z = ldexpf(upper.z, -this->exp_z + 8);
assert(qlower_x >= 0.0f && qlower_x <= 255.0f);
assert(qlower_y >= 0.0f && qlower_y <= 255.0f);
assert(qlower_z >= 0.0f && qlower_z <= 255.0f);
assert(qupper_x >= 0.0f && qupper_x <= 255.0f);
assert(qupper_y >= 0.0f && qupper_y <= 255.0f);
assert(qupper_z >= 0.0f && qupper_z <= 255.0f);
qlower_x = min(max(floorf(qlower_x),0.0f),255.0f);
qlower_y = min(max(floorf(qlower_y),0.0f),255.0f);
qlower_z = min(max(floorf(qlower_z),0.0f),255.0f);
qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f);
qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f);
qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f);
BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z));
/* verify that quantized bounds are conservative */
BBox3f dbounds = dequantize_bounds(qbounds, base);
dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
assert(subset(fbounds, dbounds));
return qbounds;
}
/* this function de-quantizes the provided bounds */
const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const
{
const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8);
const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8);
const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8);
const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8);
const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8);
const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8);
return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z));
}
/* Determines if a child is valid. We have only to look at the
* topmost bit of lower_x and upper_x to determine if child is
* valid */
bool valid(int i) const {
return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80);
}
/* Determines if the node is in fat leaf mode. */
bool isFatLeaf() const {
return this->nodeType != NODE_TYPE_MIXED;
}
/* Sets the offset to the child memory. */
void setChildOffset(void* childDataPtr)
{
int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0;
assert(childDataOffset % 64 == 0);
assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64));
this->childOffset = (int32_t)(childDataOffset / 64);
}
/* Sets the type, size, and current primitive of a child */
void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim)
{
// there is no need to store block_delta for last child
if (child == NUM_CHILDREN-1) block_delta = 0;
assert(block_delta < 4);
assert(cur_prim < 16);
if (isFatLeaf())
{
assert(this->nodeType == childType);
this->childData[child].startPrim = cur_prim;
this->childData[child].blockIncr = block_delta;
}
else
{
assert(cur_prim == 0);
this->childData[child].startPrim = childType;
this->childData[child].blockIncr = block_delta;
}
}
void invalidateChild(uint32_t childID)
{
/* set child bounds to invalid */
this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80;
this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00;
}
/* Sets child bounds */
void setChildBounds(uint32_t childID, const BBox3f& fbounds)
{
assert(fbounds.lower.x <= fbounds.upper.x);
assert(fbounds.lower.y <= fbounds.upper.y);
assert(fbounds.lower.z <= fbounds.upper.z);
const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower);
this->lower_x[childID] = (uint8_t)qbounds.lower.x;
this->lower_y[childID] = (uint8_t)qbounds.lower.y;
this->lower_z[childID] = (uint8_t)qbounds.lower.z;
this->upper_x[childID] = (uint8_t)qbounds.upper.x;
this->upper_y[childID] = (uint8_t)qbounds.upper.y;
this->upper_z[childID] = (uint8_t)qbounds.upper.z;
assert(valid(childID));
}
/* Sets an entire child, including bounds, type, size, and referenced primitive. */
void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0)
{
setChildType(childID, type, block_delta, cur_prim);
setChildBounds(childID, fbounds);
}
/* Calculates the byte offset to the child. The offset is
* relative to the address this node. */
int64_t getChildOffset(uint32_t childID) const
{
int64_t ofs = this->childOffset;
for (uint32_t j = 0; j < childID; j++)
ofs += this->childData[j].blockIncr;
return 64 * ofs;
}
/* Returns the type of the child. In fat leaf mode the type is
* shared between all children, otherwise a per-child type is
* encoded inside the startPrim member for each child. */
NodeType getChildType(uint32_t childID) const
{
if (isFatLeaf())
return this->nodeType;
else
return (NodeType)(this->childData[childID].startPrim);
}
/* Returns the start primitive of a child. In case of children
* in fat-leaf mode, all children are leaves, and the start
* primitive specifies the primitive in a leaf block where the
* leaf start. */
uint32_t getChildStartPrim(uint32_t childID) const
{
if (isFatLeaf())
return this->childData[childID].startPrim;
else
return 0;
}
/* Returns a node reference for the given child. This reference
* includes the node pointer, type, and start primitive. */
NodeRef child(void* This, int childID) const {
return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID));
}
NodeRef child(int i) const {
return child((void*)this, i);
}
};
template<typename QInternalNode>
struct InternalNode : public InternalNodeCommon<QInternalNode>
{
using InternalNodeCommon<QInternalNode>::valid;
using InternalNodeCommon<QInternalNode>::getChildType;
using InternalNodeCommon<QInternalNode>::getChildOffset;
using InternalNodeCommon<QInternalNode>::getChildStartPrim;
using InternalNodeCommon<QInternalNode>::conservativeBox;
using InternalNodeCommon<QInternalNode>::dequantize_bounds;
using InternalNodeCommon<QInternalNode>::NUM_CHILDREN;
InternalNode() {
}
InternalNode (NodeType type)
: InternalNodeCommon<QInternalNode>(type) {}
/* Constructs an internal node. The quantization grid gets
* initialized from the provided parent bounds. */
InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED)
: InternalNode(type)
{
setNodeBounds(box);
}
void setNodeBounds(BBox3f box)
{
/* initialize quantization grid */
box = conservativeBox(box);
const float _ulp = std::numeric_limits<float>::epsilon();
const float up = 1.0f + float(_ulp);
Vec3f len = box.size() * up;
this->lower = box.lower;
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
#else
int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
#endif
_exp_x = max(-128,_exp_x); // enlarge too tight bounds
_exp_y = max(-128,_exp_y);
_exp_z = max(-128,_exp_z);
this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127);
this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127);
this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127);
}
/* dequantizes the bounds of the specified child */
const BBox3f bounds(uint32_t childID) const
{
return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]),
Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])),
this->lower);
}
const BBox3f bounds() const
{
BBox3f b = empty;
for (size_t i=0; i<NUM_CHILDREN; i++) {
if (!valid(i)) continue;
b.extend(bounds(i));
}
return b;
}
void copy_to( InternalNode* dst ) const
{
*dst = *this;
dst->setChildOffset((char*)this + getChildOffset(0));
}
#if !defined(__RTRT_GSIM)
/* output of internal node */
void print(std::ostream& cout, uint32_t depth, bool close) const
{
cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl;
cout << tab(depth) << " addr = " << this << std::endl;
cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl;
cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl;
cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl;
for (uint32_t i = 0; i < NUM_CHILDREN; i++)
{
cout << tab(depth) << " child" << i << " = { ";
if (valid(i))
{
cout << "type = " << getChildType(i);
cout << ", offset = " << getChildOffset(i);
cout << ", prim = " << getChildStartPrim(i);
cout << ", bounds = " << bounds(i);
}
else {
cout << "INVALID";
}
cout << " }" << std::endl;
}
if (close)
cout << tab(depth) << "}";
}
/* output operator for internal node */
friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) {
node.print(cout, 0, true); return cout;
}
#endif
};
inline size_t GetInternalNodeSize(uint32_t numChildren)
{
if (numChildren <= 6)
return sizeof(InternalNode6Data);
else
assert(false);
return 0;
}
typedef InternalNode<InternalNode6Data> InternalNode6;
}

View file

@ -0,0 +1,151 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/sysinfo.h"
#include "sys/vector.h"
#include "math/vec2.h"
#include "math/vec3.h"
#include "math/bbox.h"
#include "math/affinespace.h"
#else
#include "../../common/default.h"
#endif
namespace embree
{
enum QuadifierType : uint16_t
{
QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle
QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired
QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle
QUADIFIER_MAX_DISTANCE = 31,
};
template<typename Ty, size_t N>
struct static_deque
{
__forceinline Ty pop_front() {
assert(size());
return operator[](begin++);
}
__forceinline void push_back(const Ty& v) {
assert(size() < N);
operator[](end++) = v;
}
__forceinline size_t size() const {
assert(end >= begin);
return end-begin;
}
__forceinline bool full() const {
return size() == N;
}
__forceinline void erase( size_t j )
{
assert(j >= begin && j < end);
/* fast path as we mostly just merge with the subsequent triangle */
if (likely(j == begin))
begin++;
/* fastest when left side is small */
else if (j-begin < end-j-1) {
for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1);
begin++;
}
/* fastest if right side is small */
else {
for (size_t i=j+1; i<end; i++) operator[](i-1) = operator[](i);
end--;
}
}
__forceinline Ty& operator[] ( const size_t i ) { return array[i%N]; }
__forceinline const Ty& operator[] ( const size_t i ) const { return array[i%N]; }
Ty array[N];
size_t begin = 0;
size_t end = 0;
};
__forceinline bool pair_triangles(Vec3<uint32_t> a, Vec3<uint32_t> b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2)
{
const vuint<4> va(a.x,a.y,a.z,0);
const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va;
const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va;
const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va;
lb0 = bsf(movemask(mb0));
lb1 = bsf(movemask(mb1));
lb2 = bsf(movemask(mb2));
return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1;
}
template<typename GetTriangleFunc>
__forceinline void merge_triangle_window( uint32_t geomID, static_deque<uint32_t,32>& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle )
{
uint32_t primID0 = triangleWindow.pop_front();
/* load first triangle */
Vec3<uint32_t> tri0 = getTriangle(geomID, primID0);
/* find a second triangle in triangle window to pair with */
for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot )
{
/* load second triangle */
uint32_t primID1 = triangleWindow[slot];
Vec3<uint32_t> tri1 = getTriangle(geomID, primID1);
/* try to pair triangles */
uint8_t lb0,lb1,lb2;
bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2);
/* the offset between the triangles cannot be too large as hardware limits bits for offset encode */
uint32_t prim_offset = primID1 - primID0;
pair &= prim_offset <= QUADIFIER_MAX_DISTANCE;
/* store pairing if successful */
if (pair)
{
assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED);
quads_o[primID0] = (QuadifierType) prim_offset;
quads_o[primID1] = QUADIFIER_PAIRED;
triangleWindow.erase(slot);
return;
}
}
/* make a triangle if we fail to find a candiate to pair with */
quads_o[primID0] = QUADIFIER_TRIANGLE;
}
template<typename GetTriangleFunc>
inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle )
{
static_deque<uint32_t, 32> triangleWindow;
size_t numTrianglePairs = 0;
for (uint32_t primID=primID0; primID<primID1; primID++)
{
triangleWindow.push_back(primID);
if (triangleWindow.full()) {
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
numTrianglePairs++;
}
}
while (triangleWindow.size()) {
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
numTrianglePairs++;
}
return numTrianglePairs;
}
}

View file

@ -0,0 +1,762 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTHWIF_EXPORT_API
#include "rtbuild.h"
#include "qbvh6_builder_sah.h"
// get definition of debug extension
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
#include "../../level_zero/ze_wrapper.h"
#endif
namespace embree
{
using namespace embree::isa;
static tbb::task_arena g_arena(tbb::this_task_arena::max_concurrency(),tbb::this_task_arena::max_concurrency());
inline ze_rtas_triangle_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID) {
assert(primID < geom->triangleCount);
return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride);
}
inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) {
assert(vertexID < geom->vertexCount);
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
}
inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) {
assert(primID < geom->quadCount);
return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride);
}
inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) {
assert(vertexID < geom->vertexCount);
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
}
inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
{
switch (geom->transformFormat)
{
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: {
const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: {
const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: {
const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform;
return {
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
};
}
default:
throw std::runtime_error("invalid transform format");
}
}
inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom)
{
if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32)
throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32");
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified");
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
}
inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom)
{
if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32)
throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32");
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified");
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
}
inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom)
{
if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified");
if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero");
}
inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
{
if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified");
if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified");
if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instanciate specified");
}
inline bool buildBounds(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->triangleCount) return false;
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
if (unlikely(!isvalid(p0))) return false;
if (unlikely(!isvalid(p1))) return false;
if (unlikely(!isvalid(p2))) return false;
bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2));
return true;
}
inline bool buildBounds(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->quadCount) return false;
const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
if (unlikely(tri.v3 >= geom->vertexCount)) return false;
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
const Vec3f p3 = getVertex(geom,tri.v3);
if (unlikely(!isvalid(p0))) return false;
if (unlikely(!isvalid(p1))) return false;
if (unlikely(!isvalid(p2))) return false;
if (unlikely(!isvalid(p3))) return false;
bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3));
return true;
}
inline bool buildBounds(const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= geom->primCount) return false;
if (geom->pfnGetBoundsCb == nullptr) return false;
BBox3f bounds;
ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS };
params.primID = primID;
params.primIDCount = 1;
params.pGeomUserPtr = geom->pGeomUserPtr;
params.pBuildUserPtr = buildUserPtr;
params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds;
(geom->pfnGetBoundsCb)(&params);
if (unlikely(!isvalid(bounds.lower))) return false;
if (unlikely(!isvalid(bounds.upper))) return false;
if (unlikely(bounds.empty())) return false;
bbox = (BBox3f&) bounds;
return true;
}
inline bool buildBounds(const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
{
if (primID >= 1) return false;
if (geom->pAccelerationStructure == nullptr) return false;
if (geom->pTransform == nullptr) return false;
const AffineSpace3fa local2world = getTransform(geom);
const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z);
const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z);
const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper));
if (unlikely(!isvalid(bounds.lower))) return false;
if (unlikely(!isvalid(bounds.upper))) return false;
if (unlikely(bounds.empty())) return false;
bbox = bounds;
return true;
}
template<typename GeometryType>
PrimInfo createGeometryPrimRefArray(const GeometryType* geom, void* buildUserPtr, evector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID)
{
PrimInfo pinfo(empty);
for (uint32_t primID=r.begin(); primID<r.end(); primID++)
{
BBox3fa bounds = empty;
if (!buildBounds(geom,primID,bounds,buildUserPtr)) continue;
const PrimRef prim(bounds,geomID,primID);
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
typedef struct _zet_base_desc_t
{
/** [in] type of this structure */
ze_structure_type_t stype;
/** [in,out][optional] must be null or a pointer to an extension-specific structure */
const void* pNext;
} zet_base_desc_t_;
#define VALIDATE(arg) \
{\
ze_result_t result = validate(arg);\
if (result != ZE_RESULT_SUCCESS) return result; \
}
#define VALIDATE_PTR(arg) \
{ \
if ((arg) == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
} \
ze_result_t validate(ze_driver_handle_t hDriver)
{
if (hDriver == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_device_handle_t hDevice)
{
if (hDevice == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ZE_RESULT_SUCCESS;
}
bool checkDescChain(zet_base_desc_t_* desc)
{
/* supporting maximal 1024 to also detect cycles */
for (size_t i=0; i<1024; i++) {
if (desc->pNext == nullptr) return true;
desc = (zet_base_desc_t_*) desc->pNext;
}
return false;
}
struct ze_rtas_builder
{
ze_rtas_builder () {
}
~ze_rtas_builder() {
magick = 0x0;
}
bool verify() const {
return magick == MAGICK;
}
enum { MAGICK = 0x45FE67E1 };
uint32_t magick = MAGICK;
};
ze_result_t validate(ze_rtas_builder_exp_handle_t hBuilder)
{
if (hBuilder == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
if (!((ze_rtas_builder*)hBuilder)->verify())
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
return ZE_RESULT_SUCCESS;
}
struct ze_rtas_parallel_operation_t
{
ze_rtas_parallel_operation_t() {
}
~ze_rtas_parallel_operation_t() {
magick = 0x0;
}
ze_result_t verify() const
{
if (magick != MAGICK)
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
return ZE_RESULT_SUCCESS;
}
enum { MAGICK = 0xE84567E1 };
uint32_t magick = MAGICK;
std::atomic<bool> object_in_use = false;
ze_result_t errorCode = ZE_RESULT_SUCCESS;
tbb::task_group group;
};
ze_result_t validate(ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
{
if (hParallelOperation == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify();
}
ze_result_t validate(const ze_rtas_builder_exp_desc_t* pDescriptor)
{
if (pDescriptor == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (!checkDescChain((zet_base_desc_t_*)pDescriptor))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties)
{
if (pProperties == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (!checkDescChain((zet_base_desc_t_*)pProperties))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_format_exp_t rtasFormat)
{
if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(const ze_rtas_builder_build_op_exp_desc_t* args)
{
/* check for valid pointers */
if (args == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check if input descriptor has proper type */
if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)args))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check if acceleration structure format is supported */
VALIDATE(args->rtasFormat);
/* check for valid geometries array */
if (args->ppGeometries == nullptr && args->numGeometries > 0)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* validate that number of geometries are in range */
if (args->numGeometries > 0x00FFFFFF)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* validate build quality */
if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* validate build flags */
if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_builder_exp_properties_t* pProp)
{
/* check for valid pointers */
if (pProp == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check if return property has proper type */
if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)pProp))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
ze_result_t validate(ze_rtas_parallel_operation_exp_properties_t* pProperties)
{
/* check for valid pointer */
if (pProperties == nullptr)
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
/* check for proper property */
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES)
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
/* check valid pNext chain */
if (!checkDescChain((zet_base_desc_t_*)pProperties))
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder)
{
/* input validation */
VALIDATE(hDriver);
VALIDATE(pDescriptor);
VALIDATE_PTR(phBuilder);
*phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder)
{
VALIDATE(hBuilder);
delete (ze_rtas_builder*) hBuilder;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
const ze_rtas_format_exp_t accelFormat,
const ze_rtas_format_exp_t otherAccelFormat )
{
/* input validation */
VALIDATE(hDriver);
VALIDATE(accelFormat);
VALIDATE(otherAccelFormat);
/* check if rtas formats are compatible */
if (accelFormat == otherAccelFormat)
return ZE_RESULT_SUCCESS;
/* report incompatible format */
return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE;
}
uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom)
{
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1;
default : return 0;
};
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
ze_rtas_builder_exp_properties_t* pProp)
{
/* input validation */
VALIDATE(hBuilder);
VALIDATE(args);
VALIDATE(pProp);
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
const size_t numGeometries = args->numGeometries;
auto getSize = [&](uint32_t geomID) -> size_t {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return 0;
return getNumPrimitives(geom);
};
auto getType = [&](unsigned int geomID)
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
default: throw std::runtime_error("invalid geometry type");
};
};
/* query memory requirements from builder */
size_t expectedBytes = 0;
size_t worstCaseBytes = 0;
size_t scratchBytes = 0;
QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes);
/* fill return struct */
pProp->flags = 0;
pProp->rtasBufferSizeBytesExpected = expectedBytes;
pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes;
pProp->scratchBufferSizeBytes = scratchBytes;
return ZE_RESULT_SUCCESS;
}
ze_result_t zeRTASBuilderBuildExpBody(const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try
{
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
const uint32_t numGeometries = args->numGeometries;
/* verify input descriptors */
parallel_for(numGeometries,[&](uint32_t geomID) {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return;
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break;
default: throw std::runtime_error("invalid geometry type");
};
});
auto getSize = [&](uint32_t geomID) -> size_t {
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
if (geom == nullptr) return 0;
return getNumPrimitives(geom);
};
auto getType = [&](unsigned int geomID)
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
default: throw std::runtime_error("invalid geometry type");
};
};
auto createPrimRefArray = [&] (evector<PrimRef>& prims, BBox1f time_range, const range<size_t>& r, size_t k, unsigned int geomID) -> PrimInfo
{
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
assert(geom);
switch (geom->geometryType) {
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray((ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray((ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray((ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray((ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
default: throw std::runtime_error("invalid geometry type");
};
};
auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags {
return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE;
};
auto getTriangle = [&](unsigned int geomID, unsigned int primID)
{
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
const Vec3f p0 = getVertex(geom,tri.v0);
const Vec3f p1 = getVertex(geom,tri.v1);
const Vec3f p2 = getVertex(geom,tri.v2);
if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle();
if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle();
if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle();
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask);
};
auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) {
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
return Vec3<uint32_t>(tri.v0,tri.v1,tri.v2);
};
auto getQuad = [&](unsigned int geomID, unsigned int primID)
{
const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID];
assert(geom);
const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID);
const Vec3f p0 = getVertex(geom,quad.v0);
const Vec3f p1 = getVertex(geom,quad.v1);
const Vec3f p2 = getVertex(geom,quad.v2);
const Vec3f p3 = getVertex(geom,quad.v3);
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask);
};
auto getProcedural = [&](unsigned int geomID, unsigned int primID) {
const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID];
assert(geom);
return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags
};
auto getInstance = [&](unsigned int geomID, unsigned int primID)
{
assert(geometries[geomID]);
assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE);
const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID];
void* accel = geom->pAccelerationStructure;
const AffineSpace3fa local2world = getTransform(geom);
return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags
};
/* dispatch globals ptr for debugging purposes */
void* dispatchGlobalsPtr = nullptr;
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
if (args->pNext) {
zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext;
if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC) {
ze_rtas_builder_build_op_debug_exp_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_exp_desc_t*) next;
dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr;
}
}
#endif
bool verbose = false;
bool success = QBVH6BuilderSAH::build(numGeometries, nullptr,
getSize, getType,
createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance,
(char*)pRtasBuffer, rtasBufferSizeBytes,
pScratchBuffer, scratchBufferSizeBytes,
(BBox3f*) pBounds, pRtasBufferSizeBytes,
args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr);
if (!success) {
return ZE_RESULT_EXP_RTAS_BUILD_RETRY;
}
return ZE_RESULT_SUCCESS;
}
catch (std::exception& e) {
//std::cerr << "caught exception during BVH build: " << e.what() << std::endl;
return ZE_RESULT_ERROR_UNKNOWN;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes)
{
/* input validation */
VALIDATE(hBuilder);
VALIDATE(args);
VALIDATE_PTR(pScratchBuffer);
VALIDATE_PTR(pRtasBuffer);
/* if parallel operation is provided then execute using thread arena inside task group ... */
if (hParallelOperation)
{
VALIDATE(hParallelOperation);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
if (op->object_in_use.load())
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
op->object_in_use.store(true);
g_arena.execute([&](){ op->group.run([=](){
op->errorCode = zeRTASBuilderBuildExpBody(args,
pScratchBuffer, scratchBufferSizeBytes,
pRtasBuffer, rtasBufferSizeBytes,
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
});
});
return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED;
}
/* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */
else
{
ze_result_t errorCode = ZE_RESULT_SUCCESS;
g_arena.execute([&](){ errorCode = zeRTASBuilderBuildExpBody(args,
pScratchBuffer, scratchBufferSizeBytes,
pRtasBuffer, rtasBufferSizeBytes,
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
});
return errorCode;
}
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation)
{
/* input validation */
VALIDATE(hDriver);
VALIDATE_PTR(phParallelOperation);
/* create parallel operation object */
*phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation )
{
/* input validation */
VALIDATE(hParallelOperation);
/* delete parallel operation */
delete (ze_rtas_parallel_operation_t*) hParallelOperation;
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties )
{
/* input validation */
VALIDATE(hParallelOperation);
VALIDATE(pProperties);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
if (!op->object_in_use.load())
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
/* return properties */
pProperties->flags = 0;
pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency();
return ZE_RESULT_SUCCESS;
}
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
{
/* check for valid handle */
VALIDATE(hParallelOperation);
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
g_arena.execute([&](){ op->group.wait(); });
op->object_in_use.store(false); // this is slighty too early
return op->errorCode;
}
}

View file

@ -0,0 +1,66 @@
// Copyright 2009-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../level_zero/ze_api.h"
#if !defined(ZE_RTAS_BUILDER_EXP_NAME)
#include "../../level_zero/ze_rtas.h"
#endif
#include <stddef.h>
#include <stdint.h>
#if defined(__cplusplus)
# define RTHWIF_API_EXTERN_C extern "C"
#else
# define RTHWIF_API_EXTERN_C
#endif
#if defined(_WIN32)
#if defined(EMBREE_RTHWIF_STATIC_LIB)
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C
#else
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport)
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport)
#endif
#else
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif
typedef enum _ze_raytracing_accel_format_internal_t {
ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2
} ze_raytracing_accel_format_internal_t;
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
const ze_rtas_format_exp_t accelFormat,
const ze_rtas_format_exp_t otherAccelFormat);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
ze_rtas_builder_exp_properties_t* pProp);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
const ze_rtas_builder_build_op_exp_desc_t* args,
void *pScratchBuffer, size_t scratchBufferSizeBytes,
void *pRtasBuffer, size_t rtasBufferSizeBytes,
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation);
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation );
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties );
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation);

View file

@ -0,0 +1,155 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "statistics.h"
namespace embree
{
class RestoreStreamState
{
public:
RestoreStreamState(std::ostream& iostream)
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
}
~RestoreStreamState() {
iostream.flags(flags);
iostream.precision(precision);
}
private:
std::ostream& iostream;
std::ios::fmtflags flags;
std::streamsize precision;
};
double ratio(double a, double b)
{
if (b == 0.0) return 0.0f;
else return a/b;
}
double percent(double a, double b) {
return 100.0*ratio(a,b);
}
double ratio(size_t a, size_t b) {
return ratio(double(a), double(b));
}
double percent(size_t a, size_t b) {
return percent(double(a), double(b));
}
void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const
{
RestoreStreamState iostate(cout);
cout << std::setw(7) << numNodes << " ";
cout << std::setw(7) << std::setprecision(3) << sah();
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB ";
cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% ";
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " ";
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
cout << std::endl;
}
void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const
{
RestoreStreamState iostate(cout);
size_t N = blocks ? numBlocks : numLeaves;
cout << std::setw(7) << N << " ";
cout << std::setw(7) << std::setprecision(3) << sah();
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB ";
cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% ";
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " ";
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
cout << std::endl;
}
void BVHStatistics::print (std::ostream& cout) const
{
RestoreStreamState iostate(cout);
cout.setf(std::ios::fixed, std::ios::floatfield);
cout.fill(' ');
double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH;
size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes();
size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves;
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
cout << std::endl;
cout << "BVH statistics:" << std::endl;
cout << "---------------" << std::endl;
cout << " numScenePrimitives = " << numScenePrimitives << std::endl;
cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl;
cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl;
cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl;
cout << " numBVHPrimitives = " << totalPrimitives << std::endl;
cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl;
cout << std::endl;
cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl;
cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl;
cout << " total : ";
cout << std::setw(7) << totalNodes << " ";
cout << std::setw(7) << std::setprecision(3) << totalSAH;
cout << " 100.00% ";
cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB ";
cout << " 100.00% ";
cout << " 100.00% ";
cout << " ";
cout << " ";
cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl;
LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf;
cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives);
cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true);
cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
}
void BVHStatistics::print_raw(std::ostream& cout) const
{
RestoreStreamState iostate(cout);
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl;
cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl;
cout << "bvh_internal_num = " << internalNode.numNodes << std::endl;
cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl;
cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl;
cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl;
cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl;
cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl;
cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl;
cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl;
cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl;
cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl;
cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl;
cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl;
cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl;
cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl;
cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl;
cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl;
cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl;
cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl;
cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl;
cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl;
cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl;
cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl;
}
}

View file

@ -0,0 +1,118 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING)
#include "sys/platform.h"
#else
#include "../../../common/sys/platform.h"
#endif
namespace embree
{
struct BVHStatistics
{
struct NodeStat
{
NodeStat ( double nodeSAH = 0,
size_t numNodes = 0,
size_t numChildrenUsed = 0,
size_t numChildrenTotal = 0,
size_t numBytes = 0)
: nodeSAH(nodeSAH),
numNodes(numNodes),
numChildrenUsed(numChildrenUsed),
numChildrenTotal(numChildrenTotal),
numBytes(numBytes) {}
double sah() const { return nodeSAH; }
size_t bytes() const { return numBytes; }
size_t size() const { return numNodes; }
double fillRateNom () const { return double(numChildrenUsed); }
double fillRateDen () const { return double(numChildrenTotal); }
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
{
return NodeStat(a.nodeSAH + b.nodeSAH,
a.numNodes+b.numNodes,
a.numChildrenUsed+b.numChildrenUsed,
a.numChildrenTotal+b.numChildrenTotal,
a.numBytes+b.numBytes);
}
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const;
public:
double nodeSAH;
size_t numNodes;
size_t numChildrenUsed;
size_t numChildrenTotal;
size_t numBytes;
};
struct LeafStat
{
LeafStat(double leafSAH = 0.0f,
size_t numLeaves = 0,
size_t numBlocks = 0,
size_t numPrimsUsed = 0,
size_t numPrimsTotal = 0,
size_t numBytesUsed = 0,
size_t numBytesTotal = 0)
: leafSAH(leafSAH),
numLeaves(numLeaves),
numBlocks(numBlocks),
numPrimsUsed(numPrimsUsed),
numPrimsTotal(numPrimsTotal),
numBytesUsed(numBytesUsed),
numBytesTotal(numBytesTotal) {}
double sah() const { return leafSAH; }
size_t bytes() const { return numBytesTotal; }
size_t size() const { return numLeaves; }
double fillRateNom () const { return double(numPrimsUsed); }
double fillRateDen () const { return double(numPrimsTotal); }
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
{
return LeafStat(a.leafSAH + b.leafSAH,
a.numLeaves+b.numLeaves,
a.numBlocks+b.numBlocks,
a.numPrimsUsed+b.numPrimsUsed,
a.numPrimsTotal+b.numPrimsTotal,
a.numBytesUsed+b.numBytesUsed,
a.numBytesTotal+b.numBytesTotal);
}
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const;
public:
double leafSAH; //!< SAH of the leaves only
size_t numLeaves; //!< Number of leaf nodes.
size_t numBlocks; //!< Number of blocks referenced
size_t numPrimsUsed; //!< Number of active primitives
size_t numPrimsTotal; //!< Number of active and inactive primitives
size_t numBytesUsed; //!< Number of used bytes
size_t numBytesTotal; //!< Number of total bytes of leaves.
};
BVHStatistics ()
: numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {}
void print (std::ostream& cout) const;
void print_raw(std::ostream& cout) const;
size_t numScenePrimitives;
size_t numBuildPrimitives;
size_t numBuildPrimitivesPostSplit;
NodeStat internalNode;
LeafStat quadLeaf;
LeafStat proceduralLeaf;
LeafStat instanceLeaf;
};
}

View file

@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(ZE_RAYTRACING_RT_SIMULATION)
#include "rtcore.h"
#endif
#if defined(EMBREE_SYCL_RT_VALIDATION_API)
# include "rttrace_validation.h"
#else
#include <cstdint>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-W#pragma-messages"
#include <sycl/sycl.hpp>
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
enum intel_ray_flags_t
{
intel_ray_flags_none = 0x00,
intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation)
intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader)
intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays)
intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader
intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit
intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit
intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit
intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit
intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses.
intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders
};
enum intel_hit_type_t
{
intel_hit_type_committed_hit = 0,
intel_hit_type_potential_hit = 1,
};
enum intel_raytracing_ext_flag_t
{
intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported
};
// opaque types
typedef __attribute__((opencl_private)) struct intel_ray_query_opaque_t* intel_ray_query_t;
typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t;
struct intel_float2
{
float x, y;
intel_float2() {}
intel_float2(float x, float y)
: x(x), y(y) {}
intel_float2(sycl::float2 v)
: x(v.x()), y(v.y()) {}
operator sycl::float2() {
return sycl::float2(x,y);
}
};
struct intel_float3
{
float x, y, z;
intel_float3() {}
intel_float3(float x, float y, float z)
: x(x), y(y), z(z) {}
intel_float3(sycl::float3 v)
: x(v.x()), y(v.y()), z(v.z()) {}
operator sycl::float3() {
return sycl::float3(x,y,z);
}
};
struct intel_float4x3 {
intel_float3 vx, vy, vz, p;
};
struct intel_ray_desc_t
{
intel_float3 origin;
intel_float3 direction;
float tmin;
float tmax;
unsigned int mask;
intel_ray_flags_t flags;
};
// if traversal returns one can test if a triangle or procedural is hit
enum intel_candidate_type_t
{
intel_candidate_type_triangle,
intel_candidate_type_procedural
};
#ifdef __SYCL_DEVICE_ONLY__
// check supported ray tracing features
SYCL_EXTERNAL extern "C" intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag();
// initializes a ray query
SYCL_EXTERNAL extern "C" intel_ray_query_t intel_ray_query_init(
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
);
// setup for instance traversal using a transformed ray and bottom-level AS
SYCL_EXTERNAL extern "C" void intel_ray_query_forward_ray(
intel_ray_query_t query,
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
);
// commit the potential hit
SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit(
intel_ray_query_t query
);
// commit the potential hit and override hit distance and UVs
SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit_override(
intel_ray_query_t query,
float override_hit_distance,
intel_float2 override_uv
);
// start traversal of a ray query
SYCL_EXTERNAL extern "C" void intel_ray_query_start_traversal( intel_ray_query_t query );
// synchronize rayquery execution. If a ray was dispatched,
// This must be called prior to calling any of the accessors below.
SYCL_EXTERNAL extern "C" void intel_ray_query_sync( intel_ray_query_t query );
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
// this function does an implicit sync
SYCL_EXTERNAL extern "C" void intel_ray_query_abandon( intel_ray_query_t query );
// read hit information during shader execution
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for quad leaves
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for procedural leaves
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type );
SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type );
// fetch triangle vertices for a hit
SYCL_EXTERNAL extern "C" void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type );
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
// during any-hit or intersection shader execution.
SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level );
SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level );
SYCL_EXTERNAL extern "C" float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level );
SYCL_EXTERNAL extern "C" intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level );
SYCL_EXTERNAL extern "C" unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level );
SYCL_EXTERNAL extern "C" intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type );
// test whether traversal has terminated. If false, the ray has reached
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
SYCL_EXTERNAL extern "C" bool intel_is_traversal_done( intel_ray_query_t query );
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
SYCL_EXTERNAL extern "C" bool intel_has_committed_hit( intel_ray_query_t query );
#else
inline intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag() {
return intel_raytracing_ext_flag_ray_query;
}
inline intel_ray_query_t intel_ray_query_init(
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
) { return NULL; }
// setup for instance traversal using a transformed ray and bottom-level AS
inline void intel_ray_query_forward_ray(
intel_ray_query_t query,
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
) {}
// commit the potential hit
inline void intel_ray_query_commit_potential_hit(
intel_ray_query_t query
) {}
// commit the potential hit and override hit distance and UVs
inline void intel_ray_query_commit_potential_hit_override(
intel_ray_query_t query,
float override_hit_distance,
intel_float2 override_uv
) {}
// start traversal of a ray query
inline void intel_ray_query_start_traversal( intel_ray_query_t query ) {}
// synchronize rayquery execution. If a ray was dispatched,
// This must be called prior to calling any of the accessors below.
inline void intel_ray_query_sync( intel_ray_query_t query ) {}
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
// this function does an implicit sync
inline void intel_ray_query_abandon( intel_ray_query_t query ) {}
// read hit information during shader execution
inline unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
inline float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0.0f; }
inline intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { 0,0 }; }
inline bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type ) { return false; }
inline unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
inline unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
inline unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for quad leaves
inline unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for procedural leaves
inline unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
inline unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
inline intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; }
inline intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; }
// fetch triangle vertices for a hit
inline void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ) {}
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
// during any-hit or intersection shader execution.
inline intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; }
inline intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; }
inline float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level ) { return 0.0f; }
inline intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level ) { return intel_ray_flags_none; }
inline unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level ) { return 0; }
inline intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type ) { return intel_candidate_type_triangle; }
// test whether traversal has terminated. If false, the ray has reached
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
inline bool intel_is_traversal_done( intel_ray_query_t query ) { return false; }
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
inline bool intel_has_committed_hit( intel_ray_query_t query ) { return false; }
#endif
#pragma clang diagnostic pop
#endif

View file

@ -0,0 +1,293 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
# define MemRay MemRayV1
# define MemHit MemHitV1
# define QuadLeaf QuadLeafV1
# define InstanceLeaf InstanceLeafV1
#include <cstdint>
enum TraceRayCtrl
{
TRACE_RAY_INITIAL = 0, // Initializes hit and initializes traversal state
TRACE_RAY_INSTANCE = 1, // Loads committed hit and initializes traversal state
TRACE_RAY_COMMIT = 2, // Loads potential hit and loads traversal state
TRACE_RAY_CONTINUE = 3, // Loads committed hit and loads traversal state
TRACE_RAY_DONE = 256, // for internal use only
};
typedef __attribute__((opencl_global)) struct rtglobals_opaque_t* rtglobals_t;
typedef __attribute__((opencl_private)) struct rtfence_opaque_t* rtfence_t;
#if defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION)
SYCL_EXTERNAL extern "C" __attribute__((opencl_global)) void* intel_get_implicit_dispatch_globals();
SYCL_EXTERNAL extern "C" void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals);
SYCL_EXTERNAL extern "C" void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals);
SYCL_EXTERNAL extern "C" void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals);
SYCL_EXTERNAL extern "C" rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl);
SYCL_EXTERNAL extern "C" void intel_rt_sync(rtfence_t fence);
#else
inline void* intel_get_implicit_dispatch_globals() { return nullptr; }
inline void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
inline void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
inline void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
inline rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl) { return nullptr; }
inline void intel_rt_sync(rtfence_t fence) {}
#endif
enum NodeType
{
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
NODE_TYPE_INSTANCE = 0x1, // instance leaf
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
NODE_TYPE_QUAD = 0x4, // quad leaf
NODE_TYPE_INVALID = 0x7 // indicates invalid node
};
struct __attribute__ ((packed,aligned(32))) MemRayV1
{
void init(intel_ray_desc_t ray, uint64_t rootNodePtr_i)
{
org[0] = ray.origin.x;
org[1] = ray.origin.y;
org[2] = ray.origin.z;
dir[0] = ray.direction.x;
dir[1] = ray.direction.y;
dir[2] = ray.direction.z;
tnear = ray.tmin;
tfar = ray.tmax;
rootNodePtr = rootNodePtr_i;
rayFlags = ray.flags;
hitGroupSRBasePtr = 0;
hitGroupSRStride = 0;
missSRPtr = 0;
pad0 = 0;
shaderIndexMultiplier = 0;
instLeafPtr = 0;
rayMask = ray.mask;
pad1 = 0;
}
// 32 B
float org[3];
float dir[3];
float tnear;
float tfar;
// 32 B
struct { // FIXME: removing these anonymous structs triggers IGC bug
uint64_t rootNodePtr : 48; // root node to start traversal at
uint64_t rayFlags : 16; // ray flags (see RayFlag structure)
};
struct {
uint64_t hitGroupSRBasePtr : 48; // base of hit group shader record array (16-bytes alignment)
uint64_t hitGroupSRStride : 16; // stride of hit group shader record array (16-bytes alignment)
};
struct {
uint64_t missSRPtr : 48; // pointer to miss shader record to invoke on a miss (8-bytes alignment)
uint64_t pad0 : 8; // padding byte (has to be zero)
uint64_t shaderIndexMultiplier : 8; // shader index multiplier
};
struct {
uint64_t instLeafPtr : 48; // the pointer to instance leaf in case we traverse an instance (64-bytes alignment)
uint64_t rayMask : 8; // ray mask used for ray masking
uint64_t pad1 : 8; // padding byte (has to be zero)
};
};
struct __attribute__ ((packed,aligned(32))) MemHitV1
{
inline float getT() const {
return ft;
}
inline void setT(float t) {
ft = t;
}
inline float getU() const {
return fu;
}
inline void setU(float u) {
fu = u;
}
inline float getV() const {
return fv;
}
inline void setV(float v) {
fv = v;
}
inline void* getPrimLeafPtr() {
return sycl::global_ptr<void>((void*)(uint64_t(primLeafPtr)*64)).get();
}
inline void* getInstanceLeafPtr() {
return sycl::global_ptr<void>((void*)(uint64_t(instLeafPtr)*64)).get();
}
public:
float ft; // hit distance of current hit (or initial traversal distance)
float fu,fv; // barycentric hit coordinates
union {
struct {
uint32_t primIndexDelta : 16; // prim index delta for compressed meshlets and quads
uint32_t valid : 1; // set if there is a hit
uint32_t leafType : 3; // type of node primLeafPtr is pointing to
uint32_t primLeafIndex : 4; // index of the hit primitive inside the leaf
uint32_t bvhLevel : 3; // the instancing level at which the hit occured
uint32_t frontFace : 1; // whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders)
uint32_t done : 1; // used in sync mode to indicate that traversal is done
uint32_t pad0 : 3; // unused bits
};
uint32_t data;
};
struct { // FIXME: removing these anonymous structs triggers IGC bug
uint64_t primLeafPtr : 42; // pointer to BVH leaf node (multiple of 64 bytes)
uint64_t hitGroupRecPtr0 : 22; // LSB of hit group record of the hit triangle (multiple of 16 bytes)
};
struct {
uint64_t instLeafPtr : 42; // pointer to BVH instance leaf node (in multiple of 64 bytes)
uint64_t hitGroupRecPtr1 : 22; // MSB of hit group record of the hit triangle (multiple of 16 bytes)
};
void clear(bool _done, bool _valid) {
//*(sycl::int8*) this = sycl::int8(0x7F800000 /* INFINITY */, 0, 0, (_done ? 0x10000000 : 0) | (_valid ? 0x10000), 0, 0, 0, 0);
ft = fu = fv = 0.0f;
data = 0;
done = _done ? 1 : 0;
valid = _valid ? 1 : 0;
}
};
struct __attribute__ ((packed,aligned(64))) RTStack
{
union {
struct {
struct MemHit committedHit; // stores committed hit
struct MemHit potentialHit; // stores potential hit that is passed to any hit shader
};
struct MemHit hit[2]; // committedHit, potentialHit
};
struct MemRay ray[2];
char travStack[32*2];
};
struct __attribute__ ((packed)) HWAccel
{
uint64_t reserved;
float bounds[2][3]; // bounding box of the BVH
uint32_t reserved0[8];
uint32_t numTimeSegments;
uint32_t reserved1[13];
uint64_t dispatchGlobalsPtr;
};
struct __attribute__ ((packed,aligned(8))) PrimLeafDesc
{
struct {
uint32_t shaderIndex : 24; // shader index used for shader record calculations
uint32_t geomMask : 8; // geometry mask used for ray masking
};
struct {
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
uint32_t type : 1; // enable/disable culling for procedurals and instances
uint32_t geomFlags : 2; // geometry flags of this geometry
};
};
struct __attribute__ ((packed,aligned(64))) QuadLeafV1
{
struct PrimLeafDesc leafDesc;
unsigned int primIndex0;
struct {
uint32_t primIndex1Delta : 16; // delta encoded primitive index of second triangle
uint32_t j0 : 2; // specifies first vertex of second triangle
uint32_t j1 : 2; // specified second vertex of second triangle
uint32_t j2 : 2; // specified third vertex of second triangle
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
uint32_t pad : 9; // unused bits
};
float v[4][3];
};
struct __attribute__ ((packed,aligned(64))) ProceduralLeaf
{
static const constexpr uint32_t N = 13;
struct PrimLeafDesc leafDesc; // leaf header identifying the geometry
struct {
uint32_t numPrimitives : 4; // number of stored primitives
uint32_t pad : 32-4-N;
uint32_t last : N; // bit vector with a last bit per primitive
};
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
};
struct __attribute__ ((packed,aligned(64))) InstanceLeafV1
{
/* first 64 bytes accessed during traversal by hardware */
struct Part0
{
public:
struct {
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
uint32_t geomMask : 8; // geometry mask used for ray masking
};
struct {
uint32_t instanceContributionToHitGroupIndex : 24;
uint32_t pad0 : 5;
/* the following two entries are only used for procedural instances */
uint32_t type : 1; // enables/disables opaque culling
uint32_t geomFlags : 2; // unused for instances
};
struct {
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
uint64_t pad1 : 8; // unused bits
};
float world2obj_vx[3]; // 1st column of Worl2Obj transform
float world2obj_vy[3]; // 2nd column of Worl2Obj transform
float world2obj_vz[3]; // 3rd column of Worl2Obj transform
float obj2world_p[3]; // translation of Obj2World transform (on purpose in first 64 bytes)
} part0;
/* second 64 bytes accessed during shading */
struct Part1
{
struct {
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
uint64_t pad : 16; // unused bits
};
uint32_t instanceID; // user defined value per DXR spec
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
float obj2world_vx[3]; // 1st column of Obj2World transform
float obj2world_vy[3]; // 2nd column of Obj2World transform
float obj2world_vz[3]; // 3rd column of Obj2World transform
float world2obj_p[3]; // translation of World2Obj transform
} part1;
};

View file

@ -0,0 +1,287 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "rttrace_validation.h"
#define sizeof_QBVH6_InternalNode6 64
#define QBVH6_rootNodeOffset 128
/*struct rayquery_impl_t {
rtfence_t fence;
rtglobals_t dispatchGlobalsPtr;
struct RTStack* rtStack;
TraceRayCtrl ctrl;
unsigned int bvh_level;
};*/
void use_rthwif_production()
{
}
SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag()
{
return intel_raytracing_ext_flag_ray_query;
}
SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i )
{
unsigned int bvh_level = 0;
//intel_raytracing_acceleration_structure_t* accel_i = sycl::global_ptr<intel_raytracing_acceleration_structure_t>(_accel_i).get();
HWAccel* accel = (HWAccel*)accel_i;
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) accel->dispatchGlobalsPtr;
#else
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) intel_get_implicit_dispatch_globals();
#endif
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)intel_get_rt_stack( (rtglobals_t)dispatchGlobalsPtr )).get();
/* init ray */
rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset);
rtStack->committedHit.setT(INFINITY);
rtStack->committedHit.setU(0.0f);
rtStack->committedHit.setV(0.0f);
rtStack->committedHit.data = 0;
rtStack->potentialHit.setT(INFINITY);
rtStack->potentialHit.setU(0.0f);
rtStack->potentialHit.setV(0.0f);
rtStack->potentialHit.data = 0;
rtStack->potentialHit.done = 1;
rtStack->potentialHit.valid = 1;
return { nullptr, (void*) dispatchGlobalsPtr, rtStack, TRACE_RAY_INITIAL, bvh_level };
}
SYCL_EXTERNAL void intel_ray_query_forward_ray( intel_ray_query_t& query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i)
{
HWAccel* accel = (HWAccel*)accel_i;
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
/* init ray */
unsigned int bvh_level = query.bvh_level+1;
rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset);
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_INSTANCE, bvh_level };
}
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( intel_ray_query_t& query )
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
unsigned int bvh_level = query.bvh_level;
unsigned int rflags = rtStack->ray[bvh_level].rayFlags;
if (rflags & intel_ray_flags_accept_first_hit_and_end_search) {
rtStack->committedHit = rtStack->potentialHit;
rtStack->committedHit.valid = 1;
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_DONE, bvh_level };
} else {
rtStack->potentialHit.valid = 1; // FIXME: is this required?
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_COMMIT, bvh_level };
}
}
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( intel_ray_query_t& query, float override_hit_distance, intel_float2 override_uv )
{
//struct RTStack* rtStack = (struct RTStack*) query.opaque2;
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
rtStack->potentialHit.setT(override_hit_distance);
rtStack->potentialHit.setU(override_uv.x);
rtStack->potentialHit.setV(override_uv.y);
intel_ray_query_commit_potential_hit(query);
}
SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query )
{
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) query.opaque1;
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
rtStack->potentialHit.done = 1;
rtStack->potentialHit.valid = 1;
if (query.ctrl == TRACE_RAY_DONE) return;
rtfence_t fence = intel_dispatch_trace_ray_query(dispatchGlobalsPtr,query.bvh_level,query.ctrl);
query = { (void*) fence, query.opaque1, query.opaque2, TRACE_RAY_INITIAL, 0 };
}
SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query )
{
intel_rt_sync((rtfence_t)query.opaque0);
/* continue is default behaviour */
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
unsigned int bvh_level = rtStack->potentialHit.bvhLevel;
query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level };
}
SYCL_EXTERNAL void intel_sync_ray_query( intel_ray_query_t& query )
{
intel_rt_sync((rtfence_t)query.opaque0);
/* continue is default behaviour */
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
unsigned int bvh_level = rtStack->potentialHit.bvhLevel;
query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level };
}
SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query )
{
intel_ray_query_sync(query);
query = { nullptr, nullptr, nullptr, TRACE_RAY_INITIAL, 0 };
}
SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
return query.hit(hit_type).bvhLevel;
}
SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
return query.hit(hit_type).getT();
}
SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
return { query.hit(hit_type).getU(), query.hit(hit_type).getV() };
}
SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
return query.hit(hit_type).frontFace;
}
SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type )
{
struct PrimLeafDesc* __restrict leaf = (struct PrimLeafDesc*)query.hit(hit_type).getPrimLeafPtr();
return leaf->geomIndex;
}
SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
void* __restrict leaf = hit.getPrimLeafPtr();
if (hit.leafType == NODE_TYPE_QUAD)
return ((QuadLeaf*)leaf)->primIndex0 + hit.primIndexDelta;
else
return ((ProceduralLeaf*)leaf)->_primIndex[hit.primLeafIndex];
}
SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
QuadLeaf* __restrict leaf = (QuadLeaf*) hit.getPrimLeafPtr();
return leaf->primIndex0 + hit.primIndexDelta;
}
SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
ProceduralLeaf* __restrict leaf = (ProceduralLeaf*) hit.getPrimLeafPtr();
return leaf->_primIndex[hit.primLeafIndex];
}
SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
if (leaf == nullptr) return -1;
return leaf->part1.instanceIndex;
}
SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
if (leaf == nullptr) return -1;
return leaf->part1.instanceID;
}
SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } };
return {
{ leaf->part0.world2obj_vx[0], leaf->part0.world2obj_vx[1], leaf->part0.world2obj_vx[2] },
{ leaf->part0.world2obj_vy[0], leaf->part0.world2obj_vy[1], leaf->part0.world2obj_vy[2] },
{ leaf->part0.world2obj_vz[0], leaf->part0.world2obj_vz[1], leaf->part0.world2obj_vz[2] },
{ leaf->part1.world2obj_p [0], leaf->part1.world2obj_p [1], leaf->part1.world2obj_p [2] }
};
}
SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type )
{
MemHit& hit = query.hit(hit_type);
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } };
return {
{ leaf->part1.obj2world_vx[0], leaf->part1.obj2world_vx[1], leaf->part1.obj2world_vx[2] },
{ leaf->part1.obj2world_vy[0], leaf->part1.obj2world_vy[1], leaf->part1.obj2world_vy[2] },
{ leaf->part1.obj2world_vz[0], leaf->part1.obj2world_vz[1], leaf->part1.obj2world_vz[2] },
{ leaf->part0.obj2world_p [0], leaf->part0.obj2world_p [1], leaf->part0.obj2world_p [2] }
};
}
SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 verts_out[3], intel_hit_type_t hit_type )
{
const QuadLeaf* __restrict leaf = (const QuadLeaf*) query.hit(hit_type).getPrimLeafPtr();
unsigned int j0 = 0, j1 = 1, j2 = 2;
if (query.hit(hit_type).primLeafIndex != 0)
{
j0 = leaf->j0;
j1 = leaf->j1;
j2 = leaf->j2;
}
verts_out[0] = { leaf->v[j0][0], leaf->v[j0][1], leaf->v[j0][2] };
verts_out[1] = { leaf->v[j1][0], leaf->v[j1][1], leaf->v[j1][2] };
verts_out[2] = { leaf->v[j2][0], leaf->v[j2][1], leaf->v[j2][2] };
}
SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level)
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
MemRay& ray = rtStack->ray[bvh_level];
return { ray.org[0], ray.org[1], ray.org[2] };
}
SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level)
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
MemRay& ray = rtStack->ray[bvh_level];
return { ray.dir[0], ray.dir[1], ray.dir[2] };
}
SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level)
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
return rtStack->ray[bvh_level].tnear;
}
SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level)
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
return (intel_ray_flags_t) rtStack->ray[bvh_level].rayFlags;
}
SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level)
{
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
return rtStack->ray[bvh_level].rayMask;
}
SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ) {
return query.hit(intel_hit_type_potential_hit).done;
}
SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type) {
return query.hit(hit_type).leafType == NODE_TYPE_QUAD ? intel_candidate_type_triangle : intel_candidate_type_procedural;
}
SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ) {
return query.hit(intel_hit_type_committed_hit).valid;
}

View file

@ -0,0 +1,180 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <cstdint>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-W#pragma-messages"
#include <sycl/sycl.hpp>
#pragma clang diagnostic pop
enum intel_ray_flags_t
{
intel_ray_flags_none = 0x00,
intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation)
intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader)
intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays)
intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader
intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit
intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit
intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit
intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit
intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses.
intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders
};
enum intel_hit_type_t
{
intel_hit_type_committed_hit = 0,
intel_hit_type_potential_hit = 1,
};
enum intel_raytracing_ext_flag_t
{
intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported
};
struct intel_float2
{
float x, y;
intel_float2() {}
intel_float2(float x, float y)
: x(x), y(y) {}
intel_float2(sycl::float2 v)
: x(v.x()), y(v.y()) {}
operator sycl::float2() {
return sycl::float2(x,y);
}
};
struct intel_float3
{
float x, y, z;
intel_float3() {}
intel_float3(float x, float y, float z)
: x(x), y(y), z(z) {}
intel_float3(sycl::float3 v)
: x(v.x()), y(v.y()), z(v.z()) {}
operator sycl::float3() {
return sycl::float3(x,y,z);
}
};
struct intel_float4x3 {
intel_float3 vx, vy, vz, p;
};
struct intel_ray_desc_t
{
intel_float3 origin;
intel_float3 direction;
float tmin;
float tmax;
unsigned int mask;
intel_ray_flags_t flags;
};
#include "rttrace_internal.h"
// opaque types
struct intel_ray_query_t {
void* opaque0; void* opaque1; void* opaque2; uint32_t ctrl; uint32_t bvh_level;
MemHit& hit(intel_hit_type_t ty) {
struct RTStack* rtStack = (struct RTStack*) opaque2;
return rtStack->hit[ty];
}
};
typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t;
// check supported ray tracing features
SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag();
// initializes a ray query
SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
);
// setup for instance traversal using a transformed ray and bottom-level AS
SYCL_EXTERNAL void intel_ray_query_forward_ray(
intel_ray_query_t& query,
intel_ray_desc_t ray,
intel_raytracing_acceleration_structure_t accel
);
// commit the potential hit
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit(
intel_ray_query_t& query
);
// commit the potential hit and override hit distance and UVs
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override(
intel_ray_query_t& query,
float override_hit_distance,
intel_float2 override_uv
);
// start traversal of a ray query
SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query );
// synchronize rayquery execution. If a ray was dispatched,
// This must be called prior to calling any of the accessors below.
SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query );
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
// this function does an implicit sync
SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query );
// read hit information during shader execution
SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for quad leaves
SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for procedural leaves
SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type );
SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type );
// fetch triangle vertices for a hit
SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 vertices_out[3], intel_hit_type_t hit_type );
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
// during any-hit or intersection shader execution.
SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level );
SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level );
SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level );
SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level );
SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level );
// if traversal returns one can test if a triangle or procedural is hit
enum intel_candidate_type_t
{
intel_candidate_type_triangle,
intel_candidate_type_procedural
};
SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type );
// test whether traversal has terminated. If false, the ray has reached
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query );
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query );

View file

@ -0,0 +1,89 @@
## Copyright 2009-2022 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
#PROJECT(rthwif_testing)
#CMAKE_MINIMUM_REQUIRED(VERSION 3.1.0)
SET(CMAKE_CXX_STANDARD 17)
IF (NOT WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
ENDIF()
IF (NOT DEFINED EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON)
ENDIF()
IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API)
OPTION(EMBREE_SYCL_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF)
ENDIF()
IF (EMBREE_SYCL_RT_VALIDATION_API)
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API")
ENDIF()
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
ENDIF()
IF (EMBREE_SYCL_RT_SIMULATION)
SET(RT_SIM_LIBRARY rtcore)
ENDIF()
ADD_EXECUTABLE(embree_rthwif_cornell_box rthwif_cornell_box.cpp)
TARGET_LINK_LIBRARIES(embree_rthwif_cornell_box sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper)
SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT")
SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"")
INSTALL(TARGETS embree_rthwif_cornell_box DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
SIGN_TARGET(embree_rthwif_cornell_box)
ADD_EXECUTABLE(embree_rthwif_test rthwif_test.cpp)
TARGET_LINK_LIBRARIES(embree_rthwif_test sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper)
SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT")
SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"")
INSTALL(TARGETS embree_rthwif_test DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
SIGN_TARGET(embree_rthwif_test)
IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API OR EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
IF (DEFINED EMBREE_MODEL_DIR)
SET(CORNELL_BOX_REFERENCE "${EMBREE_MODEL_DIR}/reference/cornell_box_reference.tga")
ELSE()
SET(CORNELL_BOX_REFERENCE "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga")
ENDIF()
ADD_TEST(NAME rthwif_cornell_box
COMMAND embree_rthwif_cornell_box --compare "${CORNELL_BOX_REFERENCE}"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
ENDIF()
ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_expected)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_expected)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_expected)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_expected)
ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_triangles embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_triangles)
ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_procedurals embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_procedurals)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_worst_case)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_worst_case)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_worst_case)
ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_worst_case)
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-committed-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-potential-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-commit)
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-reject)
ADD_EMBREE_TEST_ECS(rthwif_test_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --procedurals-committed-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-committed-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-potential-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-commit)
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-reject)
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --procedurals-committed-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-committed-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-potential-hit)
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-commit)
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-reject)
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --procedurals-committed-hit)

Binary file not shown.

After

Width:  |  Height:  |  Size: 768 KiB

View file

@ -0,0 +1,630 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <CL/sycl.hpp>
#include "tbb/tbb.h"
#include "../rttrace/rttrace.h"
#include <level_zero/ze_wrapper.h>
#include <vector>
#include <iostream>
#include <fstream>
void* dispatchGlobalsPtr = nullptr;
static uint32_t global_width = 512;
static uint32_t global_height = 512;
void exception_handler(sycl::exception_list exceptions)
{
for (std::exception_ptr const& e : exceptions) {
try {
std::rethrow_exception(e);
} catch(sycl::exception const& e) {
std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl;
}
}
};
inline void fwrite_uchar (unsigned char v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); }
inline void fwrite_ushort(unsigned short v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); }
void storeTga(uint32_t* pixels, uint32_t width, uint32_t height, const std::string& fileName) try
{
std::fstream file;
file.exceptions (std::fstream::failbit | std::fstream::badbit);
file.open (fileName.c_str(), std::fstream::out | std::fstream::binary);
fwrite_uchar(0x00, file);
fwrite_uchar(0x00, file);
fwrite_uchar(0x02, file);
fwrite_ushort(0x0000, file);
fwrite_ushort(0x0000, file);
fwrite_uchar(0x00, file);
fwrite_ushort(0x0000, file);
fwrite_ushort(0x0000, file);
fwrite_ushort((unsigned short)width , file);
fwrite_ushort((unsigned short)height, file);
fwrite_uchar(0x18, file);
fwrite_uchar(0x20, file);
for (size_t y=0; y<height; y++) {
for (size_t x=0; x<width; x++) {
const uint32_t c = pixels[y*width+x];
fwrite_uchar((unsigned char)((c>>0)&0xFF), file);
fwrite_uchar((unsigned char)((c>>8)&0xFF), file);
fwrite_uchar((unsigned char)((c>>16)&0xFF), file);
}
}
}
catch (std::exception const& e) {
std::cout << "Error: Cannot write file " << fileName << std::endl;
throw;
}
std::vector<unsigned char> readFile(const std::string& fileName) try
{
std::fstream file;
file.exceptions (std::fstream::failbit | std::fstream::badbit);
file.open (fileName.c_str(), std::fstream::in | std::fstream::binary);
file.seekg (0, std::ios::end);
std::streampos size = file.tellg();
std::vector<unsigned char> data(size);
file.seekg (0, std::ios::beg);
file.read ((char*)data.data(), size);
file.close();
return data;
}
catch (std::exception const& e) {
std::cout << "Error: Cannot read file " << fileName << std::endl;
throw;
}
size_t compareTga(const std::string& fileNameA, const std::string& fileNameB)
{
const std::vector<unsigned char> dataA = readFile(fileNameA);
const std::vector<unsigned char> dataB = readFile(fileNameB);
if (dataA.size() != dataB.size())
return false;
size_t diff = 0;
for (int i=0; i<dataA.size(); i++)
{
if (std::abs((int)dataA[i] - (int)dataB[i]) == 1) diff++;
if (std::abs((int)dataA[i] - (int)dataB[i]) == 2) diff+=4;
if (std::abs((int)dataA[i] - (int)dataB[i]) >= 3) diff+=100;
}
return diff;
}
/* Properly allocates an acceleration structure buffer using ze_raytracing_mem_alloc_ext_desc_t property. */
void* alloc_accel_buffer(size_t bytes, sycl::device device, sycl::context context)
{
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDeviceGetProperties failed");
ze_raytracing_mem_alloc_ext_desc_t rt_desc;
rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
rt_desc.pNext = nullptr;
rt_desc.flags = 0;
ze_device_mem_alloc_desc_t device_desc;
device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
device_desc.pNext = &rt_desc;
device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED;
device_desc.ordinal = 0;
ze_host_mem_alloc_desc_t host_desc;
host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
host_desc.pNext = nullptr;
host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
void* ptr = nullptr;
ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr);
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("acceleration buffer allocation failed");
return ptr;
}
void free_accel_buffer(void* ptr, sycl::context context)
{
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr);
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("acceleration buffer free failed");
}
/* dispatch globals allocation is for debugging only */
enum Flags : uint32_t {
FLAGS_NONE,
DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise <
};
struct DispatchGlobals
{
uint64_t rtMemBasePtr; // base address of the allocated stack memory
uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0
uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks
uint32_t numDSSRTStacks : 16; // number of stacks per DSS
uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16
unsigned _reserved_mbz : 12;
uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...)
Flags flags; // per context control flags
};
void* allocDispatchGlobals(sycl::device device, sycl::context context)
{
size_t maxBVHLevels = 2; //RTC_MAX_INSTANCE_LEVEL_COUNT+1;
size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64;
size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC
size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes;
void* dispatchGlobalsPtr = alloc_accel_buffer(dispatchGlobalSize,device,context);
memset(dispatchGlobalsPtr, 0, dispatchGlobalSize);
DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr;
dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize;
dg->callStackHandlerKSP = 0;
dg->asyncStackSize = 0;
dg->numDSSRTStacks = 0;
dg->syncRayQueryCount = 0;
dg->_reserved_mbz = 0;
dg->maxBVHLevels = maxBVHLevels;
dg->flags = DEPTH_TEST_LESS_EQUAL;
return dispatchGlobalsPtr;
}
/* vertex indices for cornell_box model */
ze_rtas_triangle_indices_uint32_exp_t indices[] = {
{ 0, 1, 2 },
{ 0, 2, 3 },
{ 4, 5, 6 },
{ 4, 6, 7 },
{ 8, 9, 10 },
{ 8, 10, 11 },
{ 12, 13, 14 },
{ 12, 14, 15 },
{ 16, 17, 18 },
{ 16, 18, 19 },
{ 20, 21, 22 },
{ 20, 22, 23 },
{ 24, 25, 26 },
{ 24, 26, 27 },
{ 28, 29, 30 },
{ 28, 30, 31 },
{ 32, 33, 34 },
{ 32, 34, 35 },
{ 36, 37, 38 },
{ 36, 38, 39 },
{ 40, 41, 42 },
{ 40, 42, 43 },
{ 44, 45, 46 },
{ 44, 46, 47 },
{ 48, 49, 50 },
{ 48, 50, 51 },
{ 52, 53, 54 },
{ 52, 54, 55 },
{ 56, 57, 58 },
{ 56, 58, 59 },
{ 60, 61, 62 },
{ 60, 62, 63 },
{ 64, 65, 66 },
{ 64, 66, 67 }
};
/* vertex positions for cornell_box model */
ze_rtas_float3_exp_t vertices[] = {
{ 552.8, 0, 0 },
{ 0, 0, 0 },
{ 0, 0, 559.2 },
{ 549.6, 0, 559.2 },
{ 290, 0, 114 },
{ 240, 0, 272 },
{ 82, 0, 225 },
{ 130, 0, 65 },
{ 472, 0, 406 },
{ 314, 0, 456 },
{ 265, 0, 296 },
{ 423, 0, 247 },
{ 556, 548.8, 0 },
{ 556, 548.8, 559.2 },
{ 0, 548.8, 559.2 },
{ 0, 548.8, 0 },
{ 549.6, 0, 559.2 },
{ 0, 0, 559.2 },
{ 0, 548.8, 559.2 },
{ 556, 548.8, 559.2 },
{ 0, 0, 559.2 },
{ 0, 0, 0 },
{ 0, 548.8, 0 },
{ 0, 548.8, 559.2 },
{ 552.8, 0, 0 },
{ 549.6, 0, 559.2 },
{ 556, 548.8, 559.2 },
{ 556, 548.8, 0 },
{ 130, 165, 65 },
{ 82, 165, 225 },
{ 240, 165, 272 },
{ 290, 165, 114 },
{ 290, 0, 114 },
{ 290, 165, 114 },
{ 240, 165, 272 },
{ 240, 0, 272 },
{ 130, 0, 65 },
{ 130, 165, 65 },
{ 290, 165, 114 },
{ 290, 0, 114 },
{ 82, 0, 225 },
{ 82, 165, 225 },
{ 130, 165, 65 },
{ 130, 0, 65 },
{ 240, 0, 272 },
{ 240, 165, 272 },
{ 82, 165, 225 },
{ 82, 0, 225 },
{ 423, 330, 247 },
{ 265, 330, 296 },
{ 314, 330, 456 },
{ 472, 330, 406 },
{ 423, 0, 247 },
{ 423, 330, 247 },
{ 472, 330, 406 },
{ 472, 0, 406 },
{ 472, 0, 406 },
{ 472, 330, 406 },
{ 314, 330, 456 },
{ 314, 0, 456 },
{ 314, 0, 456 },
{ 314, 330, 456 },
{ 265, 330, 296 },
{ 265, 0, 296 },
{ 265, 0, 296 },
{ 265, 330, 296 },
{ 423, 330, 247 },
{ 423, 0, 247 },
};
/* builds acceleration structure */
void* build_rtas(sycl::device device, sycl::context context)
{
/* get L0 handles */
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device.get_platform());
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
/* create rtas builder object */
ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC };
ze_rtas_builder_exp_handle_t hBuilder = nullptr;
ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("ze_rtas_builder creation failed");
/* create geometry descriptor for single triangle mesh */
ze_rtas_builder_triangles_geometry_info_exp_t mesh = {};
mesh.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES;
mesh.geometryFlags = 0;
mesh.geometryMask = 0xFF;
mesh.triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32;
mesh.triangleCount = sizeof(indices)/sizeof(ze_rtas_triangle_indices_uint32_exp_t);
mesh.triangleStride = sizeof(ze_rtas_triangle_indices_uint32_exp_t);
mesh.pTriangleBuffer = indices;
mesh.vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
mesh.vertexCount = sizeof(vertices)/sizeof(ze_rtas_float3_exp_t);
mesh.vertexStride = sizeof(ze_rtas_float3_exp_t);
mesh.pVertexBuffer = vertices;
/* fill geometry descriptor array with pointer to single geometry descriptor */
std::vector<ze_rtas_builder_geometry_info_exp_t*> descs;
descs.push_back((ze_rtas_builder_geometry_info_exp_t*)&mesh);
/* get acceleration structure format for this device */
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDeviceGetProperties failed");
/* create parallel operation for parallel build */
ze_rtas_parallel_operation_exp_handle_t hParallelOperation = nullptr;
err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASParallelOperationCreateExp failed");
/* create descriptor of build operation */
size_t accelBufferBytesOut = 0;
ze_rtas_aabb_exp_t bounds;
ze_rtas_builder_build_op_exp_desc_t buildOp = {};
buildOp.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC;
buildOp.pNext = nullptr;
buildOp.rtasFormat = rtasProp.rtasFormat;
buildOp.buildQuality = ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
buildOp.buildFlags = 0;
buildOp.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t **) descs.data();
buildOp.numGeometries = descs.size();
/* just for debugging purposes */
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC };
buildOpDebug.dispatchGlobalsPtr = dispatchGlobalsPtr;
buildOp.pNext = &buildOpDebug;
#endif
/* query required buffer sizes */
ze_rtas_builder_exp_properties_t buildProps = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&buildOp,&buildProps);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASBuilderGetBuildPropertiesExp failed");
/* allocate scratch buffer */
std::vector<char> scratchBuffer(buildProps.scratchBufferSizeBytes);
memset(scratchBuffer.data(),0,scratchBuffer.size());
/* allocate acceleration structure buffer */
size_t accelBytes = buildProps.rtasBufferSizeBytesMaxRequired;
void* accel = alloc_accel_buffer(accelBytes,device,context);
memset(accel,0,accelBytes); // optional
/* build acceleration strucuture multi threaded */
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&buildOp,
scratchBuffer.data(),scratchBuffer.size(),
accel, accelBytes,
hParallelOperation,
nullptr, &bounds, &accelBufferBytesOut);
if (err != ZE_RESULT_EXP_RTAS_BUILD_DEFERRED)
throw std::runtime_error("zeRTASBuilderBuildExp failed");
/* after the build is started one can query number of threads to use for the build */
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(hParallelOperation,&prop);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASParallelOperationGetPropertiesExp failed");
/* build in parallel using maximal number of build threads */
tbb::parallel_for(0u, prop.maxConcurrency, 1u, [&](uint32_t) {
err = ZeWrapper::zeRTASParallelOperationJoinExp(hParallelOperation);
});
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASParallelOperationJoinExp failed");
/* destroy parallel operation again */
err = ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASParallelOperationDestroyExp failed");
/* destroy rtas builder again */
err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder);
if (err != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeRTASBuilderDestroyExp failed");
return accel;
}
/* render using simple UV shading */
void render(unsigned int x, unsigned int y, void* bvh, unsigned int* pixels, unsigned int width, unsigned int height)
{
/* write zero image if ray tracing extension is not supported */
intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag();
if (!(flags & intel_raytracing_ext_flag_ray_query)) {
pixels[y*width+x] = 0;
return;
}
/* fixed camera */
sycl::float3 vx(-1.f, -0.f, -0.f);
sycl::float3 vy(-0.f, -1.f, -0.f);
sycl::float3 vz(32.f, 32.f, 95.6379f);
sycl::float3 p(278.f, 273.f, -800.f);
/* compute primary ray */
intel_ray_desc_t ray;
ray.origin = p;
ray.direction = float(x)*vx*64.0f/float(width) + float(y)*vy*64/float(height) + vz;
ray.tmin = 0.0f;
ray.tmax = INFINITY;
ray.mask = 0xFF;
ray.flags = intel_ray_flags_none;
/* trace ray */
intel_ray_query_t query = intel_ray_query_init(ray,(intel_raytracing_acceleration_structure_t)bvh);
intel_ray_query_start_traversal(query);
intel_ray_query_sync(query);
/* get UVs of hit point */
float u = 0, v = 0;
if (intel_has_committed_hit(query))
{
sycl::float2 uv = intel_get_hit_barycentrics( query, intel_hit_type_committed_hit );
u = uv.x();
v = uv.y();
}
/* write color to framebuffer */
sycl::float3 color(u,v,1.0f-u-v);
unsigned int r = (unsigned int) (255.0f * color.x());
unsigned int g = (unsigned int) (255.0f * color.y());
unsigned int b = (unsigned int) (255.0f * color.z());
pixels[y*width+x] = (b << 16) + (g << 8) + r;
}
int main(int argc, char* argv[]) try
{
/* use can specify reference image to compare against */
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
#else
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
#endif
char* reference_img = NULL;
for (int i=1; i<argc; i++)
{
if (strcmp(argv[i], "--compare") == 0) {
if (++i >= argc) throw std::runtime_error("--compare: filename expected");
reference_img = argv[i];
}
else if (strcmp(argv[i], "--internal-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
}
else if (strcmp(argv[i], "--level-zero-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
}
else if (strcmp(argv[i], "--default-rtas-builder") == 0) {
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::AUTO;
}
else if (strcmp(argv[i], "--size") == 0) {
if (++i >= argc) throw std::runtime_error("--size: width expected");
global_width = atoi(argv[i]);
if (++i >= argc) throw std::runtime_error("--size: height expected");
global_height = atoi(argv[i]);
if (global_width == 0) throw std::runtime_error("--size: width is zero");
if (global_height == 0) throw std::runtime_error("--size: height is zero");
if (global_width > 4096) throw std::runtime_error("--size: width too large");
if (global_height > 4096) throw std::runtime_error("--size: height too large");
}
else {
throw std::runtime_error("unknown command line argument");
}
}
/* create SYCL objects */
sycl::device device = sycl::device(sycl::gpu_selector_v);
sycl::queue queue = sycl::queue(device,exception_handler);
sycl::context context = queue.get_context();
if (ZeWrapper::init() != ZE_RESULT_SUCCESS) {
std::cerr << "ZeWrapper not successfully initialized" << std::endl;
return 1;
}
ze_result_t result = ZE_RESULT_SUCCESS;
sycl::platform platform = device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
/* enable RTAS extension only when enabled */
if (rtas_build_mode == ZeWrapper::RTAS_BUILD_MODE::AUTO)
{
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDriverGetExtensionProperties failed");
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("zeDriverGetExtensionProperties failed");
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (ze_rtas_builder)
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::AUTO);
else
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::INTERNAL);
}
else
result = ZeWrapper::initRTASBuilder(hDriver,rtas_build_mode);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw std::runtime_error("cannot load ZE_experimental_rtas_builder extension");
if (result != ZE_RESULT_SUCCESS)
throw std::runtime_error("cannot initialize ZE_experimental_rtas_builder extension");
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << "using internal RTAS builder" << std::endl;
else
std::cout << "using Level Zero RTAS builder" << std::endl;
#if defined(ZE_RAYTRACING_RT_SIMULATION)
RTCore::Init();
RTCore::SetXeVersion((RTCore::XeVersion)ZE_RAYTRACING_DEVICE);
#endif
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
dispatchGlobalsPtr = allocDispatchGlobals(device,context);
#endif
/* build acceleration structure */
void* bvh = build_rtas(device,context);
/* creates framebuffer */
const uint32_t width = global_width;
const uint32_t height = global_height;
unsigned int* pixels = (unsigned int*) sycl::aligned_alloc(64,width*height*sizeof(unsigned int),device,context,sycl::usm::alloc::shared);
memset(pixels, 0, width*height*sizeof(uint32_t));
/* renders image on device */
#if defined(ZE_RAYTRACING_RT_SIMULATION)
tbb::parallel_for(tbb::blocked_range2d<uint32_t>(0,height,0,width),
[&](const tbb::blocked_range2d<uint32_t>& r) {
for (int y=r.rows().begin(); y<r.rows().end(); y++) {
for (int x=r.cols().begin(); x<r.cols().end(); x++) {
render(x,y,bvh,pixels,width,height);
}
}
});
#else
queue.submit([&](sycl::handler& cgh) {
const sycl::range<2> range(width,height);
cgh.parallel_for(range, [=](sycl::item<2> item) {
const uint32_t x = item.get_id(0);
const uint32_t y = item.get_id(1);
render(x,y,bvh,pixels,width,height);
});
});
queue.wait_and_throw();
#endif
/* free acceleration structure again */
free_accel_buffer(bvh,context);
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
free_accel_buffer(dispatchGlobalsPtr, context);
#endif
#if defined(ZE_RAYTRACING_RT_SIMULATION)
RTCore::Cleanup();
#endif
/* store image to disk */
storeTga(pixels,width,height,"cornell_box.tga");
if (!reference_img) return 0;
/* compare to reference image */
const size_t err = compareTga("cornell_box.tga", "cornell_box_reference.tga");
std::cout << "difference to reference image is " << err << std::endl;
const bool ok = err < 32;
std::cout << "cornell_box ";
if (ok) std::cout << "[PASSED]" << std::endl;
else std::cout << "[FAILED]" << std::endl;
return ok ? 0 : 1;
}
catch (std::runtime_error e) {
std::cerr << "std::runtime_error: " << e.what() << std::endl;
return 1;
}

File diff suppressed because it is too large Load diff