Initial commit.
This commit is contained in:
commit
d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions
182
Framework/external/embree/kernels/rthwif/CMakeLists.txt
vendored
Normal file
182
Framework/external/embree/kernels/rthwif/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
## Copyright 2009-2021 Intel Corporation
|
||||
## SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
cmake_minimum_required(VERSION 3.1.0)
|
||||
|
||||
project(ze_raytracing)
|
||||
|
||||
INCLUDE(CTest)
|
||||
|
||||
SET(RTHWIF_VERSION_MAJOR 4)
|
||||
SET(RTHWIF_VERSION_MINOR 1)
|
||||
SET(RTHWIF_VERSION_PATCH 0)
|
||||
SET(RTHWIF_VERSION ${RTHWIF_VERSION_MAJOR}.${RTHWIF_VERSION_MINOR}.${RTHWIF_VERSION_PATCH})
|
||||
|
||||
SET(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
IF (NOT DEFINED EMBREE_VERSION_MAJOR)
|
||||
|
||||
SET(RTHWIF_STANDALONE ON)
|
||||
SET(RTHWIF_NAME ze_raytracing)
|
||||
ADD_DEFINITIONS("-DRTHWIF_STANDALONE")
|
||||
|
||||
SET(EMBREE_CMAKEEXPORT_DIR "cmake")
|
||||
|
||||
OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON)
|
||||
|
||||
SET(EMBREE_RTHWIF_STATIC_LIB OFF)
|
||||
SET(EMBREE_BUILDER_TBB_STATIC ON)
|
||||
|
||||
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" ${CMAKE_MODULE_PATH})
|
||||
|
||||
CONFIGURE_FILE(
|
||||
"${PROJECT_SOURCE_DIR}/../../kernels/config.h.in"
|
||||
"${PROJECT_SOURCE_DIR}/../../kernels/config.h"
|
||||
)
|
||||
|
||||
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT 1)
|
||||
CONFIGURE_FILE(
|
||||
"${PROJECT_SOURCE_DIR}/../../kernels/rtcore_config.h.in"
|
||||
"${PROJECT_SOURCE_DIR}/../../include/embree4/rtcore_config.h"
|
||||
)
|
||||
|
||||
IF (NOT WIN32)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||
ENDIF()
|
||||
|
||||
ADD_SUBDIRECTORY(../../common/sys sys)
|
||||
ADD_SUBDIRECTORY(../../common/simd simd)
|
||||
|
||||
GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?)
|
||||
|
||||
find_package(TBB 2020)
|
||||
|
||||
ELSE()
|
||||
SET(RTHWIF_NAME embree_rthwif)
|
||||
OPTION(EMBREE_RTHWIF_STATIC_LIB "Build RTHWIF as a static library." ON)
|
||||
option(EMBREE_BUILDER_TBB_STATIC "Use a staticaly compiled TBB version for the Embree builder for GPU." OFF)
|
||||
ENDIF()
|
||||
|
||||
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
|
||||
ENDIF()
|
||||
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
|
||||
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
|
||||
|
||||
if (EMBREE_RTHWIF_STATIC_LIB)
|
||||
set(RTHWIF_LIB_TYPE STATIC)
|
||||
else()
|
||||
set(RTHWIF_LIB_TYPE SHARED)
|
||||
endif()
|
||||
|
||||
# by default link against the tasking target that has all TBB related
|
||||
# information we need when TASKING_TBB is used
|
||||
set(TBB_TARGET tasking)
|
||||
|
||||
if (EMBREE_BUILDER_TBB_STATIC OR NOT TASKING_TBB)
|
||||
|
||||
####################################################################
|
||||
# fetch TBB and build static version of it
|
||||
set(TBB_TARGET tbb)
|
||||
ADD_DEFINITIONS("-D_CRT_SECURE_NO_WARNINGS")
|
||||
|
||||
option(TBB_STRICT "Treat compiler warnings as errors" OFF)
|
||||
option(TBB_TEST "Enable testing" OFF)
|
||||
option(TBBMALLOC_BUILD "Enable tbbmalloc build" OFF)
|
||||
SET(TBB_DIR OFF)
|
||||
SET(BUILD_SHARED_LIBS OFF)
|
||||
|
||||
INCLUDE(FetchContent)
|
||||
|
||||
SET(FETCHCONTENT_QUIET OFF)
|
||||
|
||||
IF (NOT EMBREE_RTHWIF_TBB_GIT_REPOSITORY) # allow setting this externally
|
||||
SET(EMBREE_RTHWIF_TBB_GIT_REPOSITORY "https://github.com/oneapi-src/oneTBB.git")
|
||||
ENDIF()
|
||||
|
||||
FetchContent_Declare(
|
||||
tbb_static
|
||||
GIT_REPOSITORY ${EMBREE_RTHWIF_TBB_GIT_REPOSITORY}
|
||||
GIT_TAG v2021.6.0
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(tbb_static)
|
||||
if(NOT tbb_static_POPULATED)
|
||||
FetchContent_Populate(tbb_static)
|
||||
# We want to build tbb_static to link it into embree_rthwif, but don't want to
|
||||
# install it as part of the Embree install targets.
|
||||
add_subdirectory(${tbb_static_SOURCE_DIR} ${tbb_static_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_BASE_DIR)
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_FULLY_DISCONNECTED)
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_QUIET)
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_SOURCE_DIR_TBB_STATIC)
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED)
|
||||
MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED_TBB_STATIC)
|
||||
|
||||
MARK_AS_ADVANCED(TBB4PY_BUILD)
|
||||
MARK_AS_ADVANCED(TBBMALLOC_BUILD)
|
||||
MARK_AS_ADVANCED(TBB_BUILD)
|
||||
MARK_AS_ADVANCED(TBB_CPF)
|
||||
MARK_AS_ADVANCED(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH)
|
||||
MARK_AS_ADVANCED(TBB_ENABLE_IPO)
|
||||
MARK_AS_ADVANCED(TBB_EXAMPLES)
|
||||
MARK_AS_ADVANCED(TBB_FIND_PACKAGE)
|
||||
MARK_AS_ADVANCED(TBB_INSTALL_VARS)
|
||||
MARK_AS_ADVANCED(TBB_NO_APPCONTAINER)
|
||||
MARK_AS_ADVANCED(TBB_SANITIZE)
|
||||
MARK_AS_ADVANCED(TBB_STRICT)
|
||||
MARK_AS_ADVANCED(TBB_TEST)
|
||||
MARK_AS_ADVANCED(TBB_TEST_SPEC)
|
||||
MARK_AS_ADVANCED(TBB_VALGRIND_MEMCHECK)
|
||||
MARK_AS_ADVANCED(TBB_WINDOWS_DRIVER)
|
||||
|
||||
ADD_DEFINITIONS(-DTASKING_TBB)
|
||||
####################################################################
|
||||
ENDIF()
|
||||
|
||||
IF (RTHWIF_STANDALONE)
|
||||
include(package_ze_raytracing)
|
||||
INCLUDE(CPack)
|
||||
ENDIF()
|
||||
|
||||
IF (EMBREE_SYCL_RT_VALIDATION_API)
|
||||
ADD_LIBRARY(embree_rthwif_sycl STATIC rttrace/rttrace_validation.cpp)
|
||||
SET_PROPERTY(TARGET embree_rthwif_sycl APPEND PROPERTY COMPILE_FLAGS "-DEMBREE_SYCL_SUPPORT")
|
||||
SET_TARGET_PROPERTIES(embree_rthwif_sycl PROPERTIES COMPILE_FLAGS ${CMAKE_CXX_FLAGS_SYCL})
|
||||
|
||||
INSTALL(TARGETS embree_rthwif_sycl EXPORT embree_rthwif_sycl-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||
INSTALL(EXPORT embree_rthwif_sycl-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
|
||||
|
||||
SET(EMBREE_RTHWIF_SYCL embree_rthwif_sycl)
|
||||
ENDIF()
|
||||
|
||||
IF (NOT EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||
ADD_LIBRARY(embree_rthwif ${RTHWIF_LIB_TYPE} rtbuild/rtbuild.cpp rtbuild/qbvh6.cpp rtbuild/statistics.cpp)
|
||||
TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE ${TBB_TARGET} simd sys)
|
||||
SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES OUTPUT_NAME ${RTHWIF_NAME})
|
||||
IF (EMBREE_RTHWIF_STATIC_LIB)
|
||||
TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_RTHWIF_STATIC_LIB)
|
||||
ENDIF()
|
||||
|
||||
TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_SYCL_SUPPORT)
|
||||
|
||||
IF (EMBREE_STATIC_LIB OR NOT EMBREE_RTHWIF_STATIC_LIB)
|
||||
INSTALL(TARGETS embree_rthwif EXPORT ${RTHWIF_NAME}-targets
|
||||
LIBRARY NAMELINK_SKIP DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
|
||||
INSTALL(EXPORT ${RTHWIF_NAME}-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
ADD_SUBDIRECTORY(testing)
|
||||
|
||||
|
||||
|
||||
60
Framework/external/embree/kernels/rthwif/package_ze_raytracing.cmake
vendored
Normal file
60
Framework/external/embree/kernels/rthwif/package_ze_raytracing.cmake
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
## Copyright 2009-2021 Intel Corporation
|
||||
## SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
INCLUDE(GNUInstallDirs)
|
||||
|
||||
##############################################################
|
||||
# Install Documentation
|
||||
##############################################################
|
||||
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../LICENSE.txt" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../CHANGELOG.md" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs.txt" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-TBB.txt" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-OIDN.txt" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-DPCPP.txt" DESTINATION doc COMPONENT lib)
|
||||
INSTALL(FILES "${PROJECT_SOURCE_DIR}/../../third-party-programs-oneAPI-DPCPP.txt" DESTINATION doc COMPONENT lib)
|
||||
|
||||
##############################################################
|
||||
# CPack specific stuff
|
||||
##############################################################
|
||||
|
||||
SET(CPACK_PACKAGE_NAME "L0 Ray Tracing Build API")
|
||||
SET(CPACK_PACKAGE_FILE_NAME "ze_raytracing-${RTHWIF_VERSION}")
|
||||
SET(CPACK_STRIP_FILES TRUE)
|
||||
|
||||
SET(CPACK_PACKAGE_VERSION_MAJOR ${EMBREE_VERSION_MAJOR})
|
||||
SET(CPACK_PACKAGE_VERSION_MINOR ${EMBREE_VERSION_MINOR})
|
||||
SET(CPACK_PACKAGE_VERSION_PATCH ${EMBREE_VERSION_PATCH})
|
||||
SET(CPACK_PACKAGE_VERSION ${EMBREE_VERSION})
|
||||
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Implements acceleration structure build for L0 ray tracing extension.")
|
||||
SET(CPACK_PACKAGE_VENDOR "Intel Corporation")
|
||||
SET(CPACK_PACKAGE_CONTACT embree_support@intel.com)
|
||||
SET(CPACK_MONOLITHIC_INSTALL 1)
|
||||
|
||||
SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library")
|
||||
SET(CPACK_COMPONENT_LIB_DESCRIPTION "Library")
|
||||
|
||||
SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development")
|
||||
SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Development")
|
||||
|
||||
SET(CPACK_COMPONENT_EXAMPLES_DISPLAY_NAME "Examples")
|
||||
SET(CPACK_COMPONENT_EXAMPLES_DESCRIPTION "Examples")
|
||||
|
||||
# Windows specific settings
|
||||
IF(WIN32)
|
||||
SET(CPACK_GENERATOR ZIP)
|
||||
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x64.windows")
|
||||
|
||||
# MacOSX specific settings
|
||||
ELSEIF(APPLE)
|
||||
SET(CPACK_GENERATOR ZIP)
|
||||
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx")
|
||||
|
||||
# Linux specific settings
|
||||
ELSE()
|
||||
|
||||
SET(CPACK_GENERATOR TGZ)
|
||||
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux")
|
||||
|
||||
ENDIF()
|
||||
629
Framework/external/embree/kernels/rthwif/rtbuild/leaf.h
vendored
Normal file
629
Framework/external/embree/kernels/rthwif/rtbuild/leaf.h
vendored
Normal file
|
|
@ -0,0 +1,629 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/sysinfo.h"
|
||||
#include "sys/vector.h"
|
||||
#include "math/vec2.h"
|
||||
#include "math/vec3.h"
|
||||
#include "math/bbox.h"
|
||||
#include "math/affinespace.h"
|
||||
#else
|
||||
#include "../../../common/sys/sysinfo.h"
|
||||
#include "../../../common/sys/vector.h"
|
||||
#include "../../../common/math/vec2.h"
|
||||
#include "../../../common/math/vec3.h"
|
||||
#include "../../../common/math/bbox.h"
|
||||
#include "../../../common/math/lbbox.h"
|
||||
#include "../../../common/math/affinespace.h"
|
||||
#endif
|
||||
|
||||
#include "node_type.h"
|
||||
|
||||
#include <map>
|
||||
#include <bitset>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*
|
||||
|
||||
Internal representation for GeometryFlags.
|
||||
|
||||
*/
|
||||
|
||||
#undef OPAQUE // Windows defines OPAQUE in gdi.h
|
||||
enum class GeometryFlags : uint32_t
|
||||
{
|
||||
NONE = 0x0,
|
||||
OPAQUE = 0x1
|
||||
};
|
||||
|
||||
inline bool operator& (GeometryFlags a, GeometryFlags b) {
|
||||
return (int(a) & int(b)) ? true : false;
|
||||
}
|
||||
|
||||
/* output operator for GeometryFlags */
|
||||
inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags)
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (gflags == GeometryFlags::NONE) return cout << "NONE";
|
||||
if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE ";
|
||||
#endif
|
||||
return cout;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
This structure is a header for each leaf type. Only the
|
||||
InstanceLeaf has a slightly different header.
|
||||
|
||||
All primitives inside a leaf are of the same geometry, thus have
|
||||
the same geometry index (geomIndex), the same shader index
|
||||
(shaderIndex), the same geometry mask (geomMask), and the same
|
||||
geometry flags (geomFlags).
|
||||
|
||||
The shaderIndex is used to calculate the shader record to
|
||||
invoke. This is an extension to DXR where the geomIndex is used
|
||||
for that purpose. For DXR we can always set the shaderIndex to be
|
||||
equal to the geomIndex.
|
||||
|
||||
*/
|
||||
|
||||
struct PrimLeafDesc
|
||||
{
|
||||
static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF;
|
||||
static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF;
|
||||
|
||||
enum Type : uint32_t
|
||||
{
|
||||
TYPE_NONE = 0,
|
||||
|
||||
/* For a node type of NODE_TYPE_PROCEDURAL we support enabling
|
||||
* and disabling the opaque/non_opaque culling. */
|
||||
|
||||
TYPE_OPACITY_CULLING_ENABLED = 0,
|
||||
TYPE_OPACITY_CULLING_DISABLED = 1
|
||||
};
|
||||
|
||||
PrimLeafDesc() {}
|
||||
|
||||
PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE)
|
||||
: shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags)
|
||||
{
|
||||
if (shaderIndex > MAX_SHADER_INDEX)
|
||||
throw std::runtime_error("too large shader ID");
|
||||
|
||||
if (geomIndex > MAX_GEOM_INDEX)
|
||||
throw std::runtime_error("too large geometry ID");
|
||||
}
|
||||
|
||||
/* compares two PrimLeafDesc's for equality */
|
||||
friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b)
|
||||
{
|
||||
if (a.geomIndex != b.geomIndex) return false;
|
||||
assert(a.shaderIndex == b.shaderIndex);
|
||||
assert(a.geomMask == b.geomMask);
|
||||
assert(a.type == b.type);
|
||||
assert(a.geomFlags == b.geomFlags);
|
||||
return true;
|
||||
}
|
||||
|
||||
friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "PrimLeafDesc {" << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << geomIndex << std::endl;
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) {
|
||||
desc.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
/* Checks if opaque culling is enabled. */
|
||||
bool opaqueCullingEnabled() const {
|
||||
return type == TYPE_OPACITY_CULLING_ENABLED;
|
||||
}
|
||||
|
||||
/* procedural instances store some valid shader index */
|
||||
bool isProceduralInstance() const {
|
||||
return shaderIndex != 0xFFFFFF;
|
||||
}
|
||||
|
||||
/* returns geometry flags */
|
||||
GeometryFlags getGeomFlags() const {
|
||||
return (GeometryFlags) geomFlags;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t shaderIndex : 24; // shader index used for shader record calculations
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
|
||||
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
|
||||
/*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances
|
||||
/*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
The QuadLeaf structure stores a single quad. A quad is a triangle
|
||||
pair with a shared edge. The first triangle has vertices v0,v1,v2,
|
||||
while the second triangle has vertices v[j0],v[j1],v[j2], thus the
|
||||
second triangle used local triangle indices.
|
||||
|
||||
*/
|
||||
|
||||
struct QuadLeaf
|
||||
{
|
||||
QuadLeaf() {}
|
||||
|
||||
QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3,
|
||||
uint8_t j0, uint8_t j1, uint8_t j2,
|
||||
uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1,
|
||||
GeometryFlags gflags, uint32_t geomMask, bool last)
|
||||
|
||||
: leafDesc(shaderIndex,geomIndex,gflags,geomMask),
|
||||
primIndex0(primIndex0),
|
||||
primIndex1Delta(primIndex1-primIndex0), pad1(0),
|
||||
j0(j0),j1(j1),j2(j2),last(last),pad(0),
|
||||
v0(v0), v1(v1), v2(v2), v3(v3)
|
||||
{
|
||||
/* There are some constraints on the primitive indices. The
|
||||
* second primitive index always has to be the largest and the
|
||||
* distance between them can be at most 0xFFFF as we use 16 bits
|
||||
* to encode that difference. */
|
||||
assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF);
|
||||
}
|
||||
|
||||
/* returns the i'th vertex */
|
||||
__forceinline Vec3f vertex(size_t i) const {
|
||||
assert(i < 4); return (&v0)[i];
|
||||
}
|
||||
|
||||
/* Checks if the specified triange is the last inside a leaf
|
||||
* list. */
|
||||
bool isLast(uint32_t i = 1) const
|
||||
{
|
||||
assert(i<2);
|
||||
if (i == 0) return false; // the first triangle is never the last
|
||||
else return last; // the last bit tags the second triangle to be last
|
||||
}
|
||||
|
||||
/* Checks if the second triangle exists. */
|
||||
bool valid2() const {
|
||||
return !(j0 == 0 && j1 == 0 && j2 == 0);
|
||||
}
|
||||
|
||||
/* Calculates the number of stored triangles. */
|
||||
size_t size() const {
|
||||
return 1 + valid2();
|
||||
}
|
||||
|
||||
/* Calculates the effectively used bytes. If we store only one
|
||||
* triangle we waste the storage of one vertex. */
|
||||
size_t usedBytes() const
|
||||
{
|
||||
if (valid2()) return sizeof(QuadLeaf);
|
||||
else return sizeof(QuadLeaf)-sizeof(Vec3f);
|
||||
}
|
||||
|
||||
/* Calculates to delta to add to primIndex0 to get the primitive
|
||||
* index of the i'th triangle. */
|
||||
uint32_t primIndexDelta(uint32_t i) const
|
||||
{
|
||||
assert(i<2);
|
||||
return i*primIndex1Delta;
|
||||
}
|
||||
|
||||
/* Calculates the primitive index of the i'th triangle. */
|
||||
uint32_t primIndex(uint32_t i) const
|
||||
{
|
||||
assert(i<2);
|
||||
return primIndex0 + primIndexDelta(i);
|
||||
}
|
||||
|
||||
/* Quad mode is a special mode where the uv's over the quad are
|
||||
* defined over the entire range [0,1]x[0,1]. */
|
||||
bool quadMode() const {
|
||||
return primIndex1Delta == 0;
|
||||
}
|
||||
|
||||
/* Calculates the bounding box of this leaf. */
|
||||
BBox3f bounds() const
|
||||
{
|
||||
BBox3f b = empty;
|
||||
b.extend(v0);
|
||||
b.extend(v1);
|
||||
b.extend(v2);
|
||||
if (valid2())
|
||||
b.extend(v3);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* output of quad leaf */
|
||||
void print(std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "QuadLeaf {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
|
||||
cout << tab(depth) << " triangle0 = { " << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl;
|
||||
cout << tab(depth) << " v0 = " << v0 << std::endl;
|
||||
cout << tab(depth) << " v1 = " << v1 << std::endl;
|
||||
cout << tab(depth) << " v2 = " << v2 << std::endl;
|
||||
cout << tab(depth) << " }" << std::endl;
|
||||
if (valid2()) {
|
||||
cout << tab(depth) << " triangle1 = { " << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl;
|
||||
cout << tab(depth) << " v0 = " << vertex(j0) << std::endl;
|
||||
cout << tab(depth) << " v1 = " << vertex(j1) << std::endl;
|
||||
cout << tab(depth) << " v2 = " << vertex(j2) << std::endl;
|
||||
cout << tab(depth) << " }" << std::endl;
|
||||
}
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* output operator for QuadLeaf */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) {
|
||||
leaf.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
PrimLeafDesc leafDesc; // the leaf header
|
||||
|
||||
uint32_t primIndex0; // primitive index of first triangle
|
||||
struct {
|
||||
uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle
|
||||
uint32_t pad1 : 11; // MBZ
|
||||
uint32_t j0 : 2; // specifies first vertex of second triangle
|
||||
uint32_t j1 : 2; // specified second vertex of second triangle
|
||||
uint32_t j2 : 2; // specified third vertex of second triangle
|
||||
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
|
||||
uint32_t pad : 9; // unused bits
|
||||
};
|
||||
|
||||
Vec3f v0; // first vertex of first triangle
|
||||
Vec3f v1; // second vertex of first triangle
|
||||
Vec3f v2; // third vertex of first triangle
|
||||
Vec3f v3; // forth vertex used for second triangle
|
||||
};
|
||||
|
||||
static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large");
|
||||
|
||||
/*
|
||||
|
||||
Internal instance flags definition.
|
||||
|
||||
*/
|
||||
|
||||
struct InstanceFlags
|
||||
{
|
||||
enum Flags : uint8_t
|
||||
{
|
||||
NONE = 0x0,
|
||||
TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags
|
||||
TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles
|
||||
FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque
|
||||
FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque
|
||||
};
|
||||
|
||||
InstanceFlags() {}
|
||||
|
||||
InstanceFlags(Flags rflags)
|
||||
: flags(rflags) {}
|
||||
|
||||
InstanceFlags(uint8_t rflags)
|
||||
: flags((Flags)rflags) {}
|
||||
|
||||
operator Flags () const {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* output operator for InstanceFlags */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags)
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (iflags == InstanceFlags::NONE) return cout << "NONE";
|
||||
if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE ";
|
||||
if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE ";
|
||||
if (iflags.force_opaque) cout << "FORCE_OPAQUE ";
|
||||
if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE ";
|
||||
#endif
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
Flags flags;
|
||||
struct
|
||||
{
|
||||
bool triangle_cull_disable : 1;
|
||||
bool triangle_front_counterclockwise : 1;
|
||||
bool force_opaque : 1;
|
||||
bool force_non_opaque : 1;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) {
|
||||
return (InstanceFlags::Flags)(int(a) | int(b));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
The instance leaf represent an instance. It essentially stores
|
||||
transformation matrices (local to world as well as world to
|
||||
local) of the instance as well as a pointer to the start node
|
||||
of some BVH.
|
||||
|
||||
The instance leaf consists of two parts, part0 (first 64 bytes)
|
||||
and part1 (second 64 bytes). Part0 will only get accessed by
|
||||
hardware and stores the world to local transformation as well as
|
||||
the BVH node to start traversal. Part1 stores additional data
|
||||
that is only read by the shader, e.g. it stores the local to
|
||||
world transformation of the instance.
|
||||
|
||||
The layout of the first part of the InstanceLeaf is compatible
|
||||
with a ProceduralLeaf, thus we can use the same layout for
|
||||
software instancing if we want.
|
||||
|
||||
*/
|
||||
|
||||
struct InstanceLeaf
|
||||
{
|
||||
InstanceLeaf() {}
|
||||
|
||||
InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask)
|
||||
{
|
||||
part0.shaderIndex = 0; //InstShaderRecordID;
|
||||
part0.geomMask = instMask;
|
||||
|
||||
part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex;
|
||||
part0.pad0 = 0;
|
||||
part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
|
||||
part0.geomFlags = (uint32_t) GeometryFlags::NONE;
|
||||
|
||||
part0.startNodePtr = startNodePtr;
|
||||
assert((startNodePtr >> 48) == 0);
|
||||
part0.instFlags = (InstanceFlags) 0;
|
||||
part0.pad1 = 0;
|
||||
|
||||
part1.instanceID = instUserID;
|
||||
part1.instanceIndex = instID;
|
||||
part1.bvhPtr = (uint64_t) 0;
|
||||
part1.pad = 0;
|
||||
|
||||
part1.obj2world_vx = obj2world.l.vx;
|
||||
part1.obj2world_vy = obj2world.l.vy;
|
||||
part1.obj2world_vz = obj2world.l.vz;
|
||||
part0.obj2world_p = obj2world.p;
|
||||
|
||||
const AffineSpace3f world2obj = rcp(obj2world);
|
||||
part0.world2obj_vx = world2obj.l.vx;
|
||||
part0.world2obj_vy = world2obj.l.vy;
|
||||
part0.world2obj_vz = world2obj.l.vz;
|
||||
part1.world2obj_p = world2obj.p;
|
||||
}
|
||||
|
||||
/* Returns the address of the start node pointer. We need this
|
||||
* address to calculate relocation tables when dumping the BVH to
|
||||
* disk. */
|
||||
const uint64_t startNodePtrAddr() const {
|
||||
return (uint64_t)((char*)&part0 + 8);
|
||||
}
|
||||
|
||||
/* Returns the address of the BVH that contains the start node. */
|
||||
const uint64_t bvhPtrAddr() const {
|
||||
return (uint64_t)&part1;
|
||||
}
|
||||
|
||||
/* returns the world to object space transformation matrix. */
|
||||
const AffineSpace3f World2Obj() const {
|
||||
return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p);
|
||||
}
|
||||
|
||||
/* returns the object to world space transformation matrix. */
|
||||
const AffineSpace3f Obj2World() const {
|
||||
return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p);
|
||||
}
|
||||
|
||||
/* output operator for instance leaf */
|
||||
void print (std::ostream& cout, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl;
|
||||
else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl;
|
||||
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl;
|
||||
cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl;
|
||||
cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl;
|
||||
cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl;
|
||||
cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl;
|
||||
cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl;
|
||||
cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl;
|
||||
cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl;
|
||||
cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl;
|
||||
cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl;
|
||||
cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl;
|
||||
cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl;
|
||||
cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl;
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* output operator for InstanceLeaf */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) {
|
||||
leaf.print(cout,0); return cout;
|
||||
}
|
||||
|
||||
/* first 64 bytes accessed during traversal by hardware */
|
||||
struct Part0
|
||||
{
|
||||
/* Checks if opaque culling is enabled. */
|
||||
bool opaqueCullingEnabled() const {
|
||||
return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
|
||||
uint32_t instanceContributionToHitGroupIndex : 24;
|
||||
uint32_t pad0 : 5;
|
||||
|
||||
/* the following two entries are only used for procedural instances */
|
||||
/*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling
|
||||
/*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances
|
||||
|
||||
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
|
||||
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
|
||||
uint64_t pad1 : 8; // unused bits
|
||||
|
||||
Vec3f world2obj_vx; // 1st column of Worl2Obj transform
|
||||
Vec3f world2obj_vy; // 2nd column of Worl2Obj transform
|
||||
Vec3f world2obj_vz; // 3rd column of Worl2Obj transform
|
||||
Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes)
|
||||
} part0;
|
||||
|
||||
/* second 64 bytes accessed during shading */
|
||||
struct Part1
|
||||
{
|
||||
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
|
||||
uint64_t pad : 16; // unused bits
|
||||
|
||||
uint32_t instanceID; // user defined value per DXR spec
|
||||
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
|
||||
|
||||
Vec3f obj2world_vx; // 1st column of Obj2World transform
|
||||
Vec3f obj2world_vy; // 2nd column of Obj2World transform
|
||||
Vec3f obj2world_vz; // 3rd column of Obj2World transform
|
||||
Vec3f world2obj_p; // translation of World2Obj transform
|
||||
} part1;
|
||||
};
|
||||
|
||||
static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large");
|
||||
|
||||
|
||||
/*
|
||||
Leaf type for procedural geometry. This leaf only contains the
|
||||
leaf header (which identifices the geometry) and a list of
|
||||
primitive indices.
|
||||
|
||||
The BVH will typically reference only some of the primitives
|
||||
stores inside this leaf. The range is specified by a start
|
||||
primitive and the last primitive is tagged with a bit.
|
||||
|
||||
*/
|
||||
|
||||
struct ProceduralLeaf
|
||||
{
|
||||
static const uint32_t N = 13;
|
||||
|
||||
/* Creates an empty procedural leaf. */
|
||||
ProceduralLeaf ()
|
||||
: leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0)
|
||||
{
|
||||
for (auto& id : _primIndex) id = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
/* Creates a procedural leaf with one primitive. More primitives
|
||||
* of the same geometry can get added later using the add
|
||||
* function. */
|
||||
|
||||
ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last)
|
||||
: leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE)
|
||||
{
|
||||
for (auto& id : _primIndex) id = 0xFFFFFFFF;
|
||||
_primIndex[0] = primIndex;
|
||||
}
|
||||
|
||||
/* returns the number of primitives stored inside this leaf */
|
||||
uint32_t size() const {
|
||||
return numPrimitives;
|
||||
}
|
||||
|
||||
/* Calculates the effectively used bytes. */
|
||||
size_t usedBytes() const
|
||||
{
|
||||
/*if (leafDesc.isProceduralInstance())
|
||||
return sizeof(InstanceLeaf);
|
||||
else*/
|
||||
return sizeof(PrimLeafDesc)+4+4*numPrimitives;
|
||||
}
|
||||
|
||||
/* if possible adds a new primitive to this leaf */
|
||||
bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in)
|
||||
{
|
||||
assert(primIndex_in != 0xFFFFFFFF);
|
||||
if (numPrimitives >= N) return false;
|
||||
if (!numPrimitives) leafDesc = leafDesc_in;
|
||||
if (leafDesc != leafDesc_in) return false;
|
||||
_primIndex[numPrimitives] = primIndex_in;
|
||||
if (last_in) last |= 1 << numPrimitives;
|
||||
else last &= ~(1 << numPrimitives);
|
||||
numPrimitives++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* returns the primitive index of the i'th primitive */
|
||||
uint32_t primIndex(uint32_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
return _primIndex[i];
|
||||
}
|
||||
|
||||
/* checks if the i'th primitive is the last in a leaf list */
|
||||
bool isLast(uint32_t i) const {
|
||||
if (i >= N) return true; // just to make some verify tests happy
|
||||
else return (last >> i) & 1;
|
||||
}
|
||||
|
||||
/* output operator for procedural leaf */
|
||||
void print (std::ostream& cout, uint32_t i, uint32_t depth) const
|
||||
{
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
cout << tab(depth) << "ProceduralLeaf {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " slot = " << i << std::endl;
|
||||
if (i < N) {
|
||||
cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl;
|
||||
cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl;
|
||||
cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl;
|
||||
cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl;
|
||||
cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl;
|
||||
} else {
|
||||
cout << tab(depth) << " INVALID" << std::endl;
|
||||
}
|
||||
cout << tab(depth) << "}";
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
PrimLeafDesc leafDesc; // leaf header identifying the geometry
|
||||
uint32_t numPrimitives : 4; // number of stored primitives
|
||||
uint32_t pad : 32-4-N;
|
||||
uint32_t last : N; // bit vector with a last bit per primitive
|
||||
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
|
||||
};
|
||||
|
||||
static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large");
|
||||
}
|
||||
56
Framework/external/embree/kernels/rthwif/rtbuild/node_type.h
vendored
Normal file
56
Framework/external/embree/kernels/rthwif/rtbuild/node_type.h
vendored
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* The type of a node. */
|
||||
enum NodeType : uint8_t
|
||||
{
|
||||
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
|
||||
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
|
||||
NODE_TYPE_INSTANCE = 0x1, // instance leaf
|
||||
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
|
||||
NODE_TYPE_QUAD = 0x4, // quad leaf
|
||||
NODE_TYPE_INVALID = 0x7 // indicates invalid node
|
||||
};
|
||||
|
||||
/* output operator for NodeType */
|
||||
inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type)
|
||||
{
|
||||
#if !defined(__RTRT_GSIM)
|
||||
switch (_type)
|
||||
{
|
||||
case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break;
|
||||
case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break;
|
||||
case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break;
|
||||
case NODE_TYPE_QUAD: _cout << "QUAD"; break;
|
||||
case NODE_TYPE_INVALID: _cout << "INVALID"; break;
|
||||
default: _cout << "INVALID NODE TYPE"; break;
|
||||
}
|
||||
#endif
|
||||
return _cout;
|
||||
};
|
||||
|
||||
/*
|
||||
Sub-type definition for each NodeType
|
||||
*/
|
||||
|
||||
enum SubType : uint8_t
|
||||
{
|
||||
SUB_TYPE_NONE = 0,
|
||||
|
||||
/* sub-type for NODE_TYPE_INTERNAL */
|
||||
SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children
|
||||
|
||||
/* Sub-type for NODE_TYPE_QUAD */
|
||||
SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes)
|
||||
|
||||
/* Sub-type for NODE_TYPE_PROCEDURAL */
|
||||
SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf
|
||||
};
|
||||
}
|
||||
265
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.cpp
vendored
Normal file
265
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.cpp
vendored
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "qbvh6.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename InternalNode>
|
||||
void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area)
|
||||
{
|
||||
InternalNode* inner = node.innerNode<InternalNode>();
|
||||
|
||||
size_t size = 0;
|
||||
for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++)
|
||||
{
|
||||
if (inner->valid(i))
|
||||
{
|
||||
size++;
|
||||
computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN);
|
||||
}
|
||||
}
|
||||
|
||||
/* update BVH statistics */
|
||||
stats.internalNode.numNodes++;
|
||||
stats.internalNode.numChildrenUsed += size;
|
||||
stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN;
|
||||
stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.internalNode.numBytes += sizeof(InternalNode);
|
||||
}
|
||||
|
||||
void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case NODE_TYPE_INSTANCE:
|
||||
{
|
||||
stats.instanceLeaf.numLeaves++;
|
||||
stats.instanceLeaf.numPrimsUsed++;
|
||||
stats.instanceLeaf.numPrimsTotal++;
|
||||
stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf);
|
||||
stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf);
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_QUAD:
|
||||
{
|
||||
bool last = false;
|
||||
stats.quadLeaf.numLeaves++;
|
||||
|
||||
do
|
||||
{
|
||||
QuadLeaf* quad = node.leafNodeQuad();
|
||||
node.node += sizeof(QuadLeaf);
|
||||
last = quad->isLast();
|
||||
|
||||
stats.quadLeaf.numPrimsUsed += quad->size();
|
||||
stats.quadLeaf.numPrimsTotal += 2;
|
||||
stats.quadLeaf.numBytesUsed += quad->usedBytes();
|
||||
stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf);
|
||||
stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area;
|
||||
|
||||
} while (!last);
|
||||
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_PROCEDURAL:
|
||||
{
|
||||
/*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!?
|
||||
{
|
||||
stats.proceduralLeaf.numLeaves++;
|
||||
stats.proceduralLeaf.numPrimsUsed += 1;
|
||||
stats.proceduralLeaf.numPrimsTotal += 1;
|
||||
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf);
|
||||
stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf);
|
||||
}
|
||||
else*/
|
||||
{
|
||||
bool last = false;
|
||||
uint32_t currPrim = node.cur_prim;
|
||||
stats.proceduralLeaf.numLeaves++;
|
||||
|
||||
do
|
||||
{
|
||||
ProceduralLeaf* leaf = node.leafNodeProcedural();
|
||||
last = leaf->isLast(currPrim);
|
||||
|
||||
if (currPrim == 0) {
|
||||
stats.proceduralLeaf.numBlocks++;
|
||||
stats.proceduralLeaf.numBytesUsed += leaf->usedBytes();
|
||||
stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
uint32_t primsInBlock = leaf->size();
|
||||
|
||||
stats.proceduralLeaf.numPrimsUsed++;
|
||||
stats.proceduralLeaf.numPrimsTotal++;
|
||||
stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area;
|
||||
|
||||
if (++currPrim >= primsInBlock) {
|
||||
currPrim = 0;
|
||||
node.node += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
} while (!last);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_INTERNAL:
|
||||
{
|
||||
computeInternalNodeStatistics<QBVH6::InternalNode6>(stats, node, time_range, node_bounds_area, root_bounds_area);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
BVHStatistics QBVH6::computeStatistics() const
|
||||
{
|
||||
BVHStatistics stats;
|
||||
if (empty()) return stats;
|
||||
embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6);
|
||||
return stats;
|
||||
}
|
||||
|
||||
template<typename QInternalNode>
|
||||
void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
|
||||
{
|
||||
QInternalNode* inner = node.innerNode<QInternalNode>();
|
||||
inner->print(cout, depth, false);
|
||||
std::cout << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++)
|
||||
{
|
||||
if (inner->valid(i))
|
||||
print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN);
|
||||
}
|
||||
|
||||
cout << tab(depth) << "}" << std::endl;
|
||||
}
|
||||
|
||||
void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case NODE_TYPE_INSTANCE: {
|
||||
node.leafNodeInstance()->print(cout,depth);
|
||||
cout << std::endl;
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_QUAD:
|
||||
{
|
||||
std::cout << tab(depth) << "List {" << std::endl;
|
||||
|
||||
bool last = false;
|
||||
|
||||
do
|
||||
{
|
||||
QuadLeaf* quad = node.leafNodeQuad();
|
||||
node.node += sizeof(QuadLeaf);
|
||||
last = quad->isLast();
|
||||
|
||||
quad->print(cout,depth+1);
|
||||
std::cout << std::endl;
|
||||
|
||||
} while (!last);
|
||||
|
||||
std::cout << tab(depth) << "}" << std::endl;
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_PROCEDURAL:
|
||||
{
|
||||
/*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled())
|
||||
{
|
||||
InstanceLeaf* leaf = (InstanceLeaf*) node.node;
|
||||
leaf->print(cout,depth+1);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
else*/
|
||||
{
|
||||
std::cout << tab(depth) << "List {" << std::endl;
|
||||
|
||||
bool last = false;
|
||||
uint32_t currPrim = node.cur_prim;
|
||||
|
||||
do
|
||||
{
|
||||
ProceduralLeaf* leaf = node.leafNodeProcedural();
|
||||
last = leaf->isLast(currPrim);
|
||||
|
||||
uint32_t primsInBlock = leaf->size();
|
||||
|
||||
leaf->print(cout,currPrim,depth+1);
|
||||
std::cout << std::endl;
|
||||
|
||||
if (++currPrim >= primsInBlock) {
|
||||
currPrim = 0;
|
||||
node.node += sizeof(ProceduralLeaf);
|
||||
}
|
||||
|
||||
} while (!last);
|
||||
|
||||
std::cout << tab(depth) << "}" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NODE_TYPE_INTERNAL:
|
||||
{
|
||||
printInternalNodeStatistics<QBVH6::InternalNode6>(cout, node, depth, numChildren);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "{ INVALID_NODE }" << std::endl;
|
||||
//assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function
|
||||
return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart);
|
||||
}
|
||||
|
||||
unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function
|
||||
return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned);
|
||||
}
|
||||
|
||||
uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
|
||||
return 64 * uint64_t(base->nodeDataStart + idx);
|
||||
}
|
||||
|
||||
uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function
|
||||
return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6));
|
||||
}
|
||||
|
||||
void QBVH6::print ( std::ostream& cout ) const
|
||||
{
|
||||
|
||||
cout << "QBVH @ "<< this <<" header: {\n";
|
||||
cout << " rootNodeOffset = " << rootNodeOffset << std::endl;
|
||||
cout << " bounds = " << bounds << std::endl;
|
||||
cout << " nodeDataStart = " << nodeDataStart << std::endl;
|
||||
cout << " nodeDataCur = " << nodeDataCur << std::endl;
|
||||
cout << " leafDataStart = " << leafDataCur << std::endl;
|
||||
cout << " leafDataCur = " << leafDataCur << std::endl;
|
||||
cout << " proceduralDataStart = " << proceduralDataStart << std::endl;
|
||||
cout << " proceduralDataCur = " << proceduralDataCur << std::endl;
|
||||
cout << " backPointerDataStart = " << backPointerDataStart << std::endl;
|
||||
cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl;
|
||||
cout << " numPrims = " << numPrims << std::endl;
|
||||
cout << "}" << std::endl;
|
||||
|
||||
if (empty()) return;
|
||||
|
||||
print(cout,root(),0,6);
|
||||
|
||||
if (hasBackPointers())
|
||||
{
|
||||
cout << "backpointers: {\n";
|
||||
for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) {
|
||||
cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n";
|
||||
}
|
||||
cout << "}\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
230
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.h
vendored
Normal file
230
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6.h
vendored
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "qnode.h"
|
||||
#include "statistics.h"
|
||||
#include "rtbuild.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*
|
||||
|
||||
The QBVH6 structure defines the bounding volume hierarchy (BVH)
|
||||
that is used by the hardware. It is a BVH with 6-wide branching
|
||||
factor, and quantized bounding boxes. At the leaf level quads
|
||||
(QuadLeaf type), procedural geometries (ProceduralLeaf
|
||||
type), and instances (InstanceLeaf type) can get referenced.
|
||||
|
||||
*/
|
||||
|
||||
inline constexpr size_t roundOffsetTo128(size_t offset) {
|
||||
return 2 * ((offset + 127) / 128);
|
||||
}
|
||||
|
||||
struct QBVH6
|
||||
{
|
||||
typedef NodeRef Node;
|
||||
typedef InternalNode<InternalNode6Data> InternalNode6;
|
||||
|
||||
static constexpr uint64_t rootNodeOffset = 128;
|
||||
|
||||
static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large");
|
||||
|
||||
/* structure used to initialize the memory allocator inside the BVH */
|
||||
struct SizeEstimate
|
||||
{
|
||||
SizeEstimate ()
|
||||
: nodeBytes(0), leafBytes(0), proceduralBytes(0) {}
|
||||
|
||||
SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes)
|
||||
: nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {}
|
||||
|
||||
size_t bytes() const {
|
||||
return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes;
|
||||
}
|
||||
|
||||
friend bool operator<= (SizeEstimate a, SizeEstimate b)
|
||||
{
|
||||
if (a.nodeBytes > b.nodeBytes) return false;
|
||||
if (a.leafBytes > b.leafBytes) return false;
|
||||
if (a.proceduralBytes > b.proceduralBytes) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b)
|
||||
{
|
||||
return SizeEstimate(a.nodeBytes + b.nodeBytes,
|
||||
a.leafBytes + b.leafBytes,
|
||||
a.proceduralBytes + b.proceduralBytes);
|
||||
}
|
||||
|
||||
/* output operator */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate)
|
||||
{
|
||||
cout << "SizeEstimate {" << std::endl;
|
||||
cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl;
|
||||
cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl;
|
||||
cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl;
|
||||
return cout << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t nodeBytes; // bytes required to store internal nodes
|
||||
size_t leafBytes; // bytes required to store leaf nodes
|
||||
size_t proceduralBytes; // bytes required to store procedural leaf nodes
|
||||
};
|
||||
|
||||
/* Initializes a QBVH6 node with its provided size. The memory for
|
||||
* the QBVH6 structure is overallocated and the allocation size is
|
||||
* provided to the constructor, such that the allocator of the BVH
|
||||
* can get initialized properly. */
|
||||
|
||||
QBVH6(SizeEstimate size)
|
||||
: nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart),
|
||||
leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart),
|
||||
proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart),
|
||||
backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart)
|
||||
{
|
||||
assert(size.nodeBytes % 64 == 0);
|
||||
assert(size.leafBytes % 64 == 0);
|
||||
assert(size.proceduralBytes % 64 == 0);
|
||||
assert(size.bytes() <= (64LL << 32));
|
||||
|
||||
bounds = embree::empty;
|
||||
}
|
||||
|
||||
/* Returns the root node of the BVH */
|
||||
Node root() const {
|
||||
return Node(rootNodeOffset,(uint64_t)this);
|
||||
}
|
||||
|
||||
/* sets root not offset to point to this specified node */
|
||||
void setRootNodeOffset(Node node) {
|
||||
assert(node.cur_prim == 0);
|
||||
uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this;
|
||||
assert(rootNodeOffset == rootNodeOffset1);
|
||||
}
|
||||
|
||||
/* check if BVH is empty */
|
||||
bool empty() const {
|
||||
return root().type == NODE_TYPE_INVALID;
|
||||
}
|
||||
|
||||
/* pretty printing */
|
||||
template<typename QInternalNode>
|
||||
static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6);
|
||||
static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6);
|
||||
void print(std::ostream& cout = std::cout) const;
|
||||
|
||||
/* output operator */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) {
|
||||
qbvh.print(cout); return cout;
|
||||
}
|
||||
|
||||
/* calculates BVH statistics */
|
||||
BVHStatistics computeStatistics() const;
|
||||
|
||||
/*
|
||||
This section implements a simple allocator for BVH data. The
|
||||
BVH data is separated into two section, a section where nodes
|
||||
and leaves in mixed mode are allocated, and a section where
|
||||
only leaves are allocate in fat-leaf mode.
|
||||
|
||||
*/
|
||||
public:
|
||||
|
||||
/* allocate data in the node memory section */
|
||||
char* allocNode(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(nodeDataCur + blocks <= leafDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)nodeDataCur;
|
||||
nodeDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* allocate memory in the leaf memory section */
|
||||
char* allocLeaf(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(leafDataCur + blocks <= proceduralDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)leafDataCur;
|
||||
leafDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* allocate memory in procedural leaf memory section */
|
||||
char* allocProceduralLeaf(size_t bytes)
|
||||
{
|
||||
assert(bytes % 64 == 0);
|
||||
uint32_t blocks = (uint32_t)bytes / 64;
|
||||
assert(proceduralDataCur + blocks <= backPointerDataStart);
|
||||
char* ptr = (char*)this + 64 * (size_t)proceduralDataCur;
|
||||
proceduralDataCur += blocks;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* returns pointer to node address */
|
||||
char* nodePtr(size_t ofs) {
|
||||
return (char*)this + 64 * size_t(nodeDataStart) + ofs;
|
||||
}
|
||||
/* returns pointer to address for next leaf allocation */
|
||||
char* leafPtr() {
|
||||
return (char*)this + 64 * (size_t)leafDataCur;
|
||||
}
|
||||
|
||||
/* returns the total number of bytes of the BVH */
|
||||
size_t getTotalBytes() const {
|
||||
return 64 * (size_t)backPointerDataEnd;
|
||||
}
|
||||
|
||||
/* returns number of bytes available for node allocations */
|
||||
size_t getFreeNodeBytes() const {
|
||||
return 64 * (size_t)(leafDataStart - nodeDataCur);
|
||||
}
|
||||
|
||||
/* returns number of bytes available for leaf allocations */
|
||||
size_t getFreeLeafBytes() const {
|
||||
return 64 * (size_t)(proceduralDataStart - leafDataCur);
|
||||
}
|
||||
|
||||
/* returns number of bytes available for procedural leaf allocations */
|
||||
size_t getFreeProceduralLeafBytes() const {
|
||||
return 64 * (size_t)(backPointerDataStart - proceduralDataCur);
|
||||
}
|
||||
|
||||
/* returns the bytes used by allocations */
|
||||
size_t getUsedBytes() const {
|
||||
return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes();
|
||||
}
|
||||
|
||||
bool hasBackPointers() const {
|
||||
return backPointerDataStart < backPointerDataEnd;
|
||||
}
|
||||
|
||||
public:
|
||||
ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1;
|
||||
uint32_t reserved1;
|
||||
BBox3f bounds; // bounding box of the BVH
|
||||
|
||||
uint32_t nodeDataStart; // first 64 byte block of node data
|
||||
uint32_t nodeDataCur; // next free 64 byte block for node allocations
|
||||
uint32_t leafDataStart; // first 64 byte block of leaf data
|
||||
uint32_t leafDataCur; // next free 64 byte block for leaf allocations
|
||||
uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data
|
||||
uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations
|
||||
uint32_t backPointerDataStart; // first 64 byte block for back pointers
|
||||
uint32_t backPointerDataEnd; // end of back pointer array
|
||||
uint32_t numTimeSegments = 1;
|
||||
uint32_t numPrims = 0; // number of primitives in this BVH
|
||||
uint32_t reserved[12];
|
||||
uint64_t dispatchGlobalsPtr;
|
||||
};
|
||||
|
||||
static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large");
|
||||
}
|
||||
|
||||
1340
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6_builder_sah.h
vendored
Normal file
1340
Framework/external/embree/kernels/rthwif/rtbuild/qbvh6_builder_sah.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
508
Framework/external/embree/kernels/rthwif/rtbuild/qnode.h
vendored
Normal file
508
Framework/external/embree/kernels/rthwif/rtbuild/qnode.h
vendored
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
#include "leaf.h"
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
|
||||
inline float embree_frexp(float value, int* exp)
|
||||
{
|
||||
// using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results
|
||||
// in an unresolved external symbol "__imp_frexp" error and therefore we
|
||||
// provide a the manual implemetation referenced here
|
||||
// https://en.cppreference.com/w/c/numeric/math/frexp in this case
|
||||
static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations");
|
||||
*exp = (value == 0) ? 0 : (int)(1 + logb(value));
|
||||
return scalbn(value, -(*exp));
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* The NodeRef structure references a node of the BVH. It stores the
|
||||
* pointer to that node as well as the node's type. If a leaf node
|
||||
* is referenced the current primitive to intersect is also
|
||||
* stored. */
|
||||
|
||||
struct NodeRef
|
||||
{
|
||||
NodeRef ()
|
||||
: node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {}
|
||||
|
||||
NodeRef (void* node, NodeType type, uint8_t cur_prim)
|
||||
: node((char*)node), type(type), cur_prim(cur_prim)
|
||||
{
|
||||
assert(cur_prim < 16);
|
||||
}
|
||||
|
||||
/* decode from 64 bit encoding used in MemRay and Instances */
|
||||
NodeRef (uint64_t nodePtr, uint64_t offset = 0)
|
||||
{
|
||||
node = (char*) (nodePtr & ~(uint64_t)0xF) + offset;
|
||||
//type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances
|
||||
type = (NodeType) (nodePtr & 0xF);
|
||||
cur_prim = 0;
|
||||
}
|
||||
|
||||
/* 64 bit encoding used in MemRay and Instances */
|
||||
operator uint64_t() const
|
||||
{
|
||||
//assert(type == NODE_TYPE_INTERNAL);
|
||||
assert(((uint64_t)node & 0xF) == 0);
|
||||
assert(cur_prim == 0);
|
||||
return (uint64_t)node + (uint64_t) type;
|
||||
}
|
||||
|
||||
/* returns the internal node that is referenced */
|
||||
template<typename InternalNode>
|
||||
InternalNode* innerNode() const {
|
||||
assert(type == NODE_TYPE_INTERNAL);
|
||||
return (InternalNode*)node;
|
||||
}
|
||||
|
||||
/* returns the instance leaf node that is referenced */
|
||||
InstanceLeaf* leafNodeInstance() const {
|
||||
assert(type == NODE_TYPE_INSTANCE);
|
||||
return (InstanceLeaf*)node;
|
||||
}
|
||||
|
||||
/* returns the quad leaf node that is referenced */
|
||||
QuadLeaf* leafNodeQuad() const {
|
||||
assert(type == NODE_TYPE_QUAD);
|
||||
return (QuadLeaf*)node;
|
||||
}
|
||||
|
||||
/* returns the procedural leaf node that is referenced */
|
||||
ProceduralLeaf* leafNodeProcedural() const {
|
||||
assert(type == NODE_TYPE_PROCEDURAL);
|
||||
return (ProceduralLeaf*)node;
|
||||
}
|
||||
|
||||
friend bool operator ==(const NodeRef& a, const NodeRef& b) {
|
||||
return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim);
|
||||
}
|
||||
|
||||
friend bool operator !=(const NodeRef& a, const NodeRef& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
#if !defined(__RTRT_GSIM)
|
||||
friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) {
|
||||
return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }";
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
char* node; // pointer to the referenced node
|
||||
NodeType type; // type of the node referenced
|
||||
uint8_t cur_prim : 4; // current primitive referenced in the leaf
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
The internal nodes of the BVH store references to 6 children and
|
||||
quantized bounds for each of these children.
|
||||
|
||||
All children are stored consecutively in memory at a location
|
||||
refered to by the childOffset. To calculate the relative
|
||||
location of the i'th child the size (as encoded in blockIncr) of
|
||||
all the children with index smaller than i has to get added to
|
||||
that childOffset. The calculated offset specifies the signed
|
||||
number of 64 bytes blocks relative to the node address to reach
|
||||
the child.
|
||||
|
||||
If the nodeType is INTERNAL we are in mixed mode and the type of
|
||||
each child is encoded inside the startPrim member. Otherwise we
|
||||
are in fat leaf mode and each child has the same type 'nodeType'
|
||||
and startPrim identifies the primitive where the leaf
|
||||
starts. The leaf spans all primitives from this start primitive
|
||||
to the end primitive which is marked as 'last'.
|
||||
|
||||
The bounding boxes of the children are quantized into a regular
|
||||
3D grid. The world space position of the origin of that grid is
|
||||
stored at full precision in the lower member, while the step
|
||||
size is encoded in the exp_x, exp_y, and exp_z members as power
|
||||
of 2. Thus grid coordinates together with their exponent
|
||||
(xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa
|
||||
and exponent of a floating point number representation without
|
||||
leading zero. Thus the world space position of the bounding
|
||||
planes can get calculated as follows:
|
||||
|
||||
x = lower.x + pow(2,exp_x) * 0.xi
|
||||
y = lower.y + pow(2,exp_y) * 0.yi
|
||||
z = lower.z + pow(2,exp_z) * 0.zi
|
||||
|
||||
As the stored grid coordinates for child bounds are only
|
||||
unsigned 8-bit values, ray/box intersections can get performed
|
||||
with reduced precision.
|
||||
|
||||
The node also stores a mask used for ray filtering. Only rays
|
||||
with (node.nodeMask & ray.rayMask) != 0 are traversed, all
|
||||
others are culled.
|
||||
|
||||
*/
|
||||
|
||||
struct InternalNode6Data
|
||||
{
|
||||
static constexpr uint32_t NUM_CHILDREN = 6;
|
||||
|
||||
Vec3f lower; // world space origin of quantization grid
|
||||
int32_t childOffset; // offset to all children in 64B multiples
|
||||
|
||||
NodeType nodeType; // the type of the node
|
||||
uint8_t pad; // unused byte
|
||||
|
||||
int8_t exp_x; // 2^exp_x is the size of the grid in x dimension
|
||||
int8_t exp_y; // 2^exp_y is the size of the grid in y dimension
|
||||
int8_t exp_z; // 2^exp_z is the size of the grid in z dimension
|
||||
uint8_t nodeMask; // mask used for ray filtering
|
||||
|
||||
struct ChildData
|
||||
{
|
||||
uint8_t blockIncr : 2; // size of child in 64 byte blocks
|
||||
uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
|
||||
uint8_t pad : 2; // unused bits
|
||||
} childData[NUM_CHILDREN];
|
||||
|
||||
uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension
|
||||
uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension
|
||||
uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension
|
||||
uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension
|
||||
uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension
|
||||
uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension
|
||||
};
|
||||
|
||||
static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large");
|
||||
|
||||
template<typename InternalNodeData>
|
||||
struct InternalNodeCommon : public InternalNodeData
|
||||
{
|
||||
using InternalNodeData::NUM_CHILDREN;
|
||||
|
||||
InternalNodeCommon() {
|
||||
}
|
||||
|
||||
InternalNodeCommon(NodeType type)
|
||||
{
|
||||
this->nodeType = type;
|
||||
this->childOffset = 0;
|
||||
this->nodeMask = 0xFF;
|
||||
|
||||
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++)
|
||||
this->childData[i] = { 0, 0, 0 };
|
||||
|
||||
this->lower = Vec3f(0.0f);
|
||||
this->exp_x = 0;
|
||||
this->exp_y = 0;
|
||||
this->exp_z = 0;
|
||||
|
||||
/* set all child bounds to invalid */
|
||||
for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) {
|
||||
this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80;
|
||||
this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
/* this function slightly enlarges bounds in order to make traversal watertight */
|
||||
static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) {
|
||||
const float err = ulps*std::numeric_limits<float>::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper)));
|
||||
return enlarge(box, Vec3f(err));
|
||||
}
|
||||
|
||||
/* this function quantizes the provided bounds */
|
||||
const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const
|
||||
{
|
||||
const Vec3f lower = fbounds.lower-base;
|
||||
const Vec3f upper = fbounds.upper-base;
|
||||
float qlower_x = ldexpf(lower.x, -this->exp_x + 8);
|
||||
float qlower_y = ldexpf(lower.y, -this->exp_y + 8);
|
||||
float qlower_z = ldexpf(lower.z, -this->exp_z + 8);
|
||||
float qupper_x = ldexpf(upper.x, -this->exp_x + 8);
|
||||
float qupper_y = ldexpf(upper.y, -this->exp_y + 8);
|
||||
float qupper_z = ldexpf(upper.z, -this->exp_z + 8);
|
||||
assert(qlower_x >= 0.0f && qlower_x <= 255.0f);
|
||||
assert(qlower_y >= 0.0f && qlower_y <= 255.0f);
|
||||
assert(qlower_z >= 0.0f && qlower_z <= 255.0f);
|
||||
assert(qupper_x >= 0.0f && qupper_x <= 255.0f);
|
||||
assert(qupper_y >= 0.0f && qupper_y <= 255.0f);
|
||||
assert(qupper_z >= 0.0f && qupper_z <= 255.0f);
|
||||
qlower_x = min(max(floorf(qlower_x),0.0f),255.0f);
|
||||
qlower_y = min(max(floorf(qlower_y),0.0f),255.0f);
|
||||
qlower_z = min(max(floorf(qlower_z),0.0f),255.0f);
|
||||
qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f);
|
||||
qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f);
|
||||
qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f);
|
||||
BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z));
|
||||
|
||||
/* verify that quantized bounds are conservative */
|
||||
BBox3f dbounds = dequantize_bounds(qbounds, base);
|
||||
dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
|
||||
dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
|
||||
dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
|
||||
dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8));
|
||||
dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8));
|
||||
dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8));
|
||||
assert(subset(fbounds, dbounds));
|
||||
|
||||
return qbounds;
|
||||
}
|
||||
|
||||
/* this function de-quantizes the provided bounds */
|
||||
const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const
|
||||
{
|
||||
const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8);
|
||||
const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8);
|
||||
const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8);
|
||||
const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8);
|
||||
const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8);
|
||||
const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8);
|
||||
return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z));
|
||||
}
|
||||
|
||||
/* Determines if a child is valid. We have only to look at the
|
||||
* topmost bit of lower_x and upper_x to determine if child is
|
||||
* valid */
|
||||
bool valid(int i) const {
|
||||
return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80);
|
||||
}
|
||||
|
||||
/* Determines if the node is in fat leaf mode. */
|
||||
bool isFatLeaf() const {
|
||||
return this->nodeType != NODE_TYPE_MIXED;
|
||||
}
|
||||
|
||||
/* Sets the offset to the child memory. */
|
||||
void setChildOffset(void* childDataPtr)
|
||||
{
|
||||
int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0;
|
||||
assert(childDataOffset % 64 == 0);
|
||||
assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64));
|
||||
this->childOffset = (int32_t)(childDataOffset / 64);
|
||||
}
|
||||
|
||||
/* Sets the type, size, and current primitive of a child */
|
||||
void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim)
|
||||
{
|
||||
// there is no need to store block_delta for last child
|
||||
if (child == NUM_CHILDREN-1) block_delta = 0;
|
||||
|
||||
assert(block_delta < 4);
|
||||
assert(cur_prim < 16);
|
||||
|
||||
if (isFatLeaf())
|
||||
{
|
||||
assert(this->nodeType == childType);
|
||||
this->childData[child].startPrim = cur_prim;
|
||||
this->childData[child].blockIncr = block_delta;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(cur_prim == 0);
|
||||
this->childData[child].startPrim = childType;
|
||||
this->childData[child].blockIncr = block_delta;
|
||||
}
|
||||
}
|
||||
|
||||
void invalidateChild(uint32_t childID)
|
||||
{
|
||||
/* set child bounds to invalid */
|
||||
this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80;
|
||||
this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00;
|
||||
}
|
||||
|
||||
/* Sets child bounds */
|
||||
void setChildBounds(uint32_t childID, const BBox3f& fbounds)
|
||||
{
|
||||
assert(fbounds.lower.x <= fbounds.upper.x);
|
||||
assert(fbounds.lower.y <= fbounds.upper.y);
|
||||
assert(fbounds.lower.z <= fbounds.upper.z);
|
||||
const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower);
|
||||
this->lower_x[childID] = (uint8_t)qbounds.lower.x;
|
||||
this->lower_y[childID] = (uint8_t)qbounds.lower.y;
|
||||
this->lower_z[childID] = (uint8_t)qbounds.lower.z;
|
||||
this->upper_x[childID] = (uint8_t)qbounds.upper.x;
|
||||
this->upper_y[childID] = (uint8_t)qbounds.upper.y;
|
||||
this->upper_z[childID] = (uint8_t)qbounds.upper.z;
|
||||
assert(valid(childID));
|
||||
}
|
||||
|
||||
/* Sets an entire child, including bounds, type, size, and referenced primitive. */
|
||||
void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0)
|
||||
{
|
||||
setChildType(childID, type, block_delta, cur_prim);
|
||||
setChildBounds(childID, fbounds);
|
||||
}
|
||||
|
||||
/* Calculates the byte offset to the child. The offset is
|
||||
* relative to the address this node. */
|
||||
int64_t getChildOffset(uint32_t childID) const
|
||||
{
|
||||
int64_t ofs = this->childOffset;
|
||||
for (uint32_t j = 0; j < childID; j++)
|
||||
ofs += this->childData[j].blockIncr;
|
||||
return 64 * ofs;
|
||||
}
|
||||
|
||||
/* Returns the type of the child. In fat leaf mode the type is
|
||||
* shared between all children, otherwise a per-child type is
|
||||
* encoded inside the startPrim member for each child. */
|
||||
NodeType getChildType(uint32_t childID) const
|
||||
{
|
||||
if (isFatLeaf())
|
||||
return this->nodeType;
|
||||
|
||||
else
|
||||
return (NodeType)(this->childData[childID].startPrim);
|
||||
}
|
||||
|
||||
/* Returns the start primitive of a child. In case of children
|
||||
* in fat-leaf mode, all children are leaves, and the start
|
||||
* primitive specifies the primitive in a leaf block where the
|
||||
* leaf start. */
|
||||
uint32_t getChildStartPrim(uint32_t childID) const
|
||||
{
|
||||
if (isFatLeaf())
|
||||
return this->childData[childID].startPrim;
|
||||
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns a node reference for the given child. This reference
|
||||
* includes the node pointer, type, and start primitive. */
|
||||
NodeRef child(void* This, int childID) const {
|
||||
return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID));
|
||||
}
|
||||
|
||||
NodeRef child(int i) const {
|
||||
return child((void*)this, i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename QInternalNode>
|
||||
struct InternalNode : public InternalNodeCommon<QInternalNode>
|
||||
{
|
||||
using InternalNodeCommon<QInternalNode>::valid;
|
||||
using InternalNodeCommon<QInternalNode>::getChildType;
|
||||
using InternalNodeCommon<QInternalNode>::getChildOffset;
|
||||
using InternalNodeCommon<QInternalNode>::getChildStartPrim;
|
||||
using InternalNodeCommon<QInternalNode>::conservativeBox;
|
||||
using InternalNodeCommon<QInternalNode>::dequantize_bounds;
|
||||
using InternalNodeCommon<QInternalNode>::NUM_CHILDREN;
|
||||
|
||||
InternalNode() {
|
||||
}
|
||||
|
||||
InternalNode (NodeType type)
|
||||
: InternalNodeCommon<QInternalNode>(type) {}
|
||||
|
||||
/* Constructs an internal node. The quantization grid gets
|
||||
* initialized from the provided parent bounds. */
|
||||
InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED)
|
||||
: InternalNode(type)
|
||||
{
|
||||
setNodeBounds(box);
|
||||
}
|
||||
|
||||
void setNodeBounds(BBox3f box)
|
||||
{
|
||||
/* initialize quantization grid */
|
||||
box = conservativeBox(box);
|
||||
const float _ulp = std::numeric_limits<float>::epsilon();
|
||||
const float up = 1.0f + float(_ulp);
|
||||
Vec3f len = box.size() * up;
|
||||
this->lower = box.lower;
|
||||
#if defined(__INTEL_LLVM_COMPILER) && defined(WIN32)
|
||||
int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
|
||||
int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
|
||||
int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
|
||||
#else
|
||||
int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f);
|
||||
int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f);
|
||||
int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f);
|
||||
#endif
|
||||
_exp_x = max(-128,_exp_x); // enlarge too tight bounds
|
||||
_exp_y = max(-128,_exp_y);
|
||||
_exp_z = max(-128,_exp_z);
|
||||
this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127);
|
||||
this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127);
|
||||
this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127);
|
||||
}
|
||||
|
||||
/* dequantizes the bounds of the specified child */
|
||||
const BBox3f bounds(uint32_t childID) const
|
||||
{
|
||||
return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]),
|
||||
Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])),
|
||||
this->lower);
|
||||
}
|
||||
|
||||
const BBox3f bounds() const
|
||||
{
|
||||
BBox3f b = empty;
|
||||
for (size_t i=0; i<NUM_CHILDREN; i++) {
|
||||
if (!valid(i)) continue;
|
||||
b.extend(bounds(i));
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
void copy_to( InternalNode* dst ) const
|
||||
{
|
||||
*dst = *this;
|
||||
dst->setChildOffset((char*)this + getChildOffset(0));
|
||||
}
|
||||
|
||||
#if !defined(__RTRT_GSIM)
|
||||
|
||||
/* output of internal node */
|
||||
void print(std::ostream& cout, uint32_t depth, bool close) const
|
||||
{
|
||||
cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl;
|
||||
cout << tab(depth) << " addr = " << this << std::endl;
|
||||
cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl;
|
||||
cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl;
|
||||
cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CHILDREN; i++)
|
||||
{
|
||||
cout << tab(depth) << " child" << i << " = { ";
|
||||
if (valid(i))
|
||||
{
|
||||
cout << "type = " << getChildType(i);
|
||||
cout << ", offset = " << getChildOffset(i);
|
||||
cout << ", prim = " << getChildStartPrim(i);
|
||||
cout << ", bounds = " << bounds(i);
|
||||
}
|
||||
else {
|
||||
cout << "INVALID";
|
||||
}
|
||||
cout << " }" << std::endl;
|
||||
}
|
||||
|
||||
if (close)
|
||||
cout << tab(depth) << "}";
|
||||
}
|
||||
|
||||
/* output operator for internal node */
|
||||
friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) {
|
||||
node.print(cout, 0, true); return cout;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
inline size_t GetInternalNodeSize(uint32_t numChildren)
|
||||
{
|
||||
if (numChildren <= 6)
|
||||
return sizeof(InternalNode6Data);
|
||||
else
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef InternalNode<InternalNode6Data> InternalNode6;
|
||||
}
|
||||
151
Framework/external/embree/kernels/rthwif/rtbuild/quadifier.h
vendored
Normal file
151
Framework/external/embree/kernels/rthwif/rtbuild/quadifier.h
vendored
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/sysinfo.h"
|
||||
#include "sys/vector.h"
|
||||
#include "math/vec2.h"
|
||||
#include "math/vec3.h"
|
||||
#include "math/bbox.h"
|
||||
#include "math/affinespace.h"
|
||||
#else
|
||||
#include "../../common/default.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
enum QuadifierType : uint16_t
|
||||
{
|
||||
QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle
|
||||
QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired
|
||||
QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle
|
||||
QUADIFIER_MAX_DISTANCE = 31,
|
||||
};
|
||||
|
||||
template<typename Ty, size_t N>
|
||||
struct static_deque
|
||||
{
|
||||
__forceinline Ty pop_front() {
|
||||
assert(size());
|
||||
return operator[](begin++);
|
||||
}
|
||||
|
||||
__forceinline void push_back(const Ty& v) {
|
||||
assert(size() < N);
|
||||
operator[](end++) = v;
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
assert(end >= begin);
|
||||
return end-begin;
|
||||
}
|
||||
|
||||
__forceinline bool full() const {
|
||||
return size() == N;
|
||||
}
|
||||
|
||||
__forceinline void erase( size_t j )
|
||||
{
|
||||
assert(j >= begin && j < end);
|
||||
|
||||
/* fast path as we mostly just merge with the subsequent triangle */
|
||||
if (likely(j == begin))
|
||||
begin++;
|
||||
|
||||
/* fastest when left side is small */
|
||||
else if (j-begin < end-j-1) {
|
||||
for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1);
|
||||
begin++;
|
||||
}
|
||||
|
||||
/* fastest if right side is small */
|
||||
else {
|
||||
for (size_t i=j+1; i<end; i++) operator[](i-1) = operator[](i);
|
||||
end--;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline Ty& operator[] ( const size_t i ) { return array[i%N]; }
|
||||
__forceinline const Ty& operator[] ( const size_t i ) const { return array[i%N]; }
|
||||
|
||||
Ty array[N];
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
};
|
||||
|
||||
__forceinline bool pair_triangles(Vec3<uint32_t> a, Vec3<uint32_t> b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2)
|
||||
{
|
||||
const vuint<4> va(a.x,a.y,a.z,0);
|
||||
const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va;
|
||||
const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va;
|
||||
const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va;
|
||||
lb0 = bsf(movemask(mb0));
|
||||
lb1 = bsf(movemask(mb1));
|
||||
lb2 = bsf(movemask(mb2));
|
||||
return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1;
|
||||
}
|
||||
|
||||
template<typename GetTriangleFunc>
|
||||
__forceinline void merge_triangle_window( uint32_t geomID, static_deque<uint32_t,32>& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle )
|
||||
{
|
||||
uint32_t primID0 = triangleWindow.pop_front();
|
||||
|
||||
/* load first triangle */
|
||||
Vec3<uint32_t> tri0 = getTriangle(geomID, primID0);
|
||||
|
||||
/* find a second triangle in triangle window to pair with */
|
||||
for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot )
|
||||
{
|
||||
/* load second triangle */
|
||||
uint32_t primID1 = triangleWindow[slot];
|
||||
Vec3<uint32_t> tri1 = getTriangle(geomID, primID1);
|
||||
|
||||
/* try to pair triangles */
|
||||
uint8_t lb0,lb1,lb2;
|
||||
bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2);
|
||||
|
||||
/* the offset between the triangles cannot be too large as hardware limits bits for offset encode */
|
||||
uint32_t prim_offset = primID1 - primID0;
|
||||
pair &= prim_offset <= QUADIFIER_MAX_DISTANCE;
|
||||
|
||||
/* store pairing if successful */
|
||||
if (pair)
|
||||
{
|
||||
assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED);
|
||||
quads_o[primID0] = (QuadifierType) prim_offset;
|
||||
quads_o[primID1] = QUADIFIER_PAIRED;
|
||||
triangleWindow.erase(slot);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* make a triangle if we fail to find a candiate to pair with */
|
||||
quads_o[primID0] = QUADIFIER_TRIANGLE;
|
||||
}
|
||||
|
||||
template<typename GetTriangleFunc>
|
||||
inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle )
|
||||
{
|
||||
static_deque<uint32_t, 32> triangleWindow;
|
||||
|
||||
size_t numTrianglePairs = 0;
|
||||
for (uint32_t primID=primID0; primID<primID1; primID++)
|
||||
{
|
||||
triangleWindow.push_back(primID);
|
||||
|
||||
if (triangleWindow.full()) {
|
||||
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
|
||||
numTrianglePairs++;
|
||||
}
|
||||
}
|
||||
|
||||
while (triangleWindow.size()) {
|
||||
merge_triangle_window(geomID, triangleWindow,quads_o,getTriangle);
|
||||
numTrianglePairs++;
|
||||
}
|
||||
|
||||
return numTrianglePairs;
|
||||
}
|
||||
}
|
||||
762
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.cpp
vendored
Normal file
762
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.cpp
vendored
Normal file
|
|
@ -0,0 +1,762 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTHWIF_EXPORT_API
|
||||
|
||||
#include "rtbuild.h"
|
||||
#include "qbvh6_builder_sah.h"
|
||||
|
||||
// get definition of debug extension
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
#include "../../level_zero/ze_wrapper.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
using namespace embree::isa;
|
||||
|
||||
static tbb::task_arena g_arena(tbb::this_task_arena::max_concurrency(),tbb::this_task_arena::max_concurrency());
|
||||
|
||||
inline ze_rtas_triangle_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID) {
|
||||
assert(primID < geom->triangleCount);
|
||||
return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride);
|
||||
}
|
||||
|
||||
inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) {
|
||||
assert(vertexID < geom->vertexCount);
|
||||
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
|
||||
}
|
||||
|
||||
inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) {
|
||||
assert(primID < geom->quadCount);
|
||||
return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride);
|
||||
}
|
||||
|
||||
inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) {
|
||||
assert(vertexID < geom->vertexCount);
|
||||
return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride);
|
||||
}
|
||||
|
||||
inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
|
||||
{
|
||||
switch (geom->transformFormat)
|
||||
{
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: {
|
||||
const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform;
|
||||
return {
|
||||
{ xfm->vx_x, xfm->vx_y, xfm->vx_z },
|
||||
{ xfm->vy_x, xfm->vy_y, xfm->vy_z },
|
||||
{ xfm->vz_x, xfm->vz_y, xfm->vz_z },
|
||||
{ xfm-> p_x, xfm-> p_y, xfm-> p_z }
|
||||
};
|
||||
}
|
||||
default:
|
||||
throw std::runtime_error("invalid transform format");
|
||||
}
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32)
|
||||
throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32");
|
||||
|
||||
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
|
||||
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
|
||||
|
||||
if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified");
|
||||
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32)
|
||||
throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32");
|
||||
|
||||
if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3)
|
||||
throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3");
|
||||
|
||||
if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified");
|
||||
if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified");
|
||||
if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero");
|
||||
}
|
||||
|
||||
inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom)
|
||||
{
|
||||
if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified");
|
||||
if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified");
|
||||
if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instanciate specified");
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->triangleCount) return false;
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
if (unlikely(!isvalid(p0))) return false;
|
||||
if (unlikely(!isvalid(p1))) return false;
|
||||
if (unlikely(!isvalid(p2))) return false;
|
||||
|
||||
bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2));
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->quadCount) return false;
|
||||
const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return false;
|
||||
if (unlikely(tri.v3 >= geom->vertexCount)) return false;
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
const Vec3f p3 = getVertex(geom,tri.v3);
|
||||
if (unlikely(!isvalid(p0))) return false;
|
||||
if (unlikely(!isvalid(p1))) return false;
|
||||
if (unlikely(!isvalid(p2))) return false;
|
||||
if (unlikely(!isvalid(p3))) return false;
|
||||
|
||||
bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3));
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= geom->primCount) return false;
|
||||
if (geom->pfnGetBoundsCb == nullptr) return false;
|
||||
|
||||
BBox3f bounds;
|
||||
ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS };
|
||||
params.primID = primID;
|
||||
params.primIDCount = 1;
|
||||
params.pGeomUserPtr = geom->pGeomUserPtr;
|
||||
params.pBuildUserPtr = buildUserPtr;
|
||||
params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds;
|
||||
(geom->pfnGetBoundsCb)(¶ms);
|
||||
|
||||
if (unlikely(!isvalid(bounds.lower))) return false;
|
||||
if (unlikely(!isvalid(bounds.upper))) return false;
|
||||
if (unlikely(bounds.empty())) return false;
|
||||
|
||||
bbox = (BBox3f&) bounds;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool buildBounds(const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr)
|
||||
{
|
||||
if (primID >= 1) return false;
|
||||
if (geom->pAccelerationStructure == nullptr) return false;
|
||||
if (geom->pTransform == nullptr) return false;
|
||||
|
||||
const AffineSpace3fa local2world = getTransform(geom);
|
||||
const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z);
|
||||
const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z);
|
||||
const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper));
|
||||
|
||||
if (unlikely(!isvalid(bounds.lower))) return false;
|
||||
if (unlikely(!isvalid(bounds.upper))) return false;
|
||||
if (unlikely(bounds.empty())) return false;
|
||||
|
||||
bbox = bounds;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename GeometryType>
|
||||
PrimInfo createGeometryPrimRefArray(const GeometryType* geom, void* buildUserPtr, evector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID)
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (uint32_t primID=r.begin(); primID<r.end(); primID++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(geom,primID,bounds,buildUserPtr)) continue;
|
||||
const PrimRef prim(bounds,geomID,primID);
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
typedef struct _zet_base_desc_t
|
||||
{
|
||||
/** [in] type of this structure */
|
||||
ze_structure_type_t stype;
|
||||
|
||||
/** [in,out][optional] must be null or a pointer to an extension-specific structure */
|
||||
const void* pNext;
|
||||
|
||||
} zet_base_desc_t_;
|
||||
|
||||
#define VALIDATE(arg) \
|
||||
{\
|
||||
ze_result_t result = validate(arg);\
|
||||
if (result != ZE_RESULT_SUCCESS) return result; \
|
||||
}
|
||||
|
||||
#define VALIDATE_PTR(arg) \
|
||||
{ \
|
||||
if ((arg) == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
|
||||
} \
|
||||
|
||||
ze_result_t validate(ze_driver_handle_t hDriver)
|
||||
{
|
||||
if (hDriver == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_device_handle_t hDevice)
|
||||
{
|
||||
if (hDevice == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool checkDescChain(zet_base_desc_t_* desc)
|
||||
{
|
||||
/* supporting maximal 1024 to also detect cycles */
|
||||
for (size_t i=0; i<1024; i++) {
|
||||
if (desc->pNext == nullptr) return true;
|
||||
desc = (zet_base_desc_t_*) desc->pNext;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ze_rtas_builder
|
||||
{
|
||||
ze_rtas_builder () {
|
||||
}
|
||||
|
||||
~ze_rtas_builder() {
|
||||
magick = 0x0;
|
||||
}
|
||||
|
||||
bool verify() const {
|
||||
return magick == MAGICK;
|
||||
}
|
||||
|
||||
enum { MAGICK = 0x45FE67E1 };
|
||||
uint32_t magick = MAGICK;
|
||||
};
|
||||
|
||||
ze_result_t validate(ze_rtas_builder_exp_handle_t hBuilder)
|
||||
{
|
||||
if (hBuilder == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
if (!((ze_rtas_builder*)hBuilder)->verify())
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
struct ze_rtas_parallel_operation_t
|
||||
{
|
||||
ze_rtas_parallel_operation_t() {
|
||||
}
|
||||
|
||||
~ze_rtas_parallel_operation_t() {
|
||||
magick = 0x0;
|
||||
}
|
||||
|
||||
ze_result_t verify() const
|
||||
{
|
||||
if (magick != MAGICK)
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
enum { MAGICK = 0xE84567E1 };
|
||||
uint32_t magick = MAGICK;
|
||||
std::atomic<bool> object_in_use = false;
|
||||
ze_result_t errorCode = ZE_RESULT_SUCCESS;
|
||||
tbb::task_group group;
|
||||
};
|
||||
|
||||
ze_result_t validate(ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
|
||||
{
|
||||
if (hParallelOperation == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
|
||||
|
||||
return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify();
|
||||
}
|
||||
|
||||
ze_result_t validate(const ze_rtas_builder_exp_desc_t* pDescriptor)
|
||||
{
|
||||
if (pDescriptor == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (!checkDescChain((zet_base_desc_t_*)pDescriptor))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties)
|
||||
{
|
||||
if (pProperties == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProperties))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_format_exp_t rtasFormat)
|
||||
{
|
||||
if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(const ze_rtas_builder_build_op_exp_desc_t* args)
|
||||
{
|
||||
/* check for valid pointers */
|
||||
if (args == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check if input descriptor has proper type */
|
||||
if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)args))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check if acceleration structure format is supported */
|
||||
VALIDATE(args->rtasFormat);
|
||||
|
||||
/* check for valid geometries array */
|
||||
if (args->ppGeometries == nullptr && args->numGeometries > 0)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* validate that number of geometries are in range */
|
||||
if (args->numGeometries > 0x00FFFFFF)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* validate build quality */
|
||||
if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* validate build flags */
|
||||
if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_builder_exp_properties_t* pProp)
|
||||
{
|
||||
/* check for valid pointers */
|
||||
if (pProp == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check if return property has proper type */
|
||||
if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProp))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t validate(ze_rtas_parallel_operation_exp_properties_t* pProperties)
|
||||
{
|
||||
/* check for valid pointer */
|
||||
if (pProperties == nullptr)
|
||||
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
|
||||
|
||||
/* check for proper property */
|
||||
if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES)
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
/* check valid pNext chain */
|
||||
if (!checkDescChain((zet_base_desc_t_*)pProperties))
|
||||
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE(pDescriptor);
|
||||
VALIDATE_PTR(phBuilder);
|
||||
|
||||
*phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder)
|
||||
{
|
||||
VALIDATE(hBuilder);
|
||||
delete (ze_rtas_builder*) hBuilder;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
|
||||
const ze_rtas_format_exp_t accelFormat,
|
||||
const ze_rtas_format_exp_t otherAccelFormat )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE(accelFormat);
|
||||
VALIDATE(otherAccelFormat);
|
||||
|
||||
/* check if rtas formats are compatible */
|
||||
if (accelFormat == otherAccelFormat)
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
/* report incompatible format */
|
||||
return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE;
|
||||
}
|
||||
|
||||
uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom)
|
||||
{
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1;
|
||||
default : return 0;
|
||||
};
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
ze_rtas_builder_exp_properties_t* pProp)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hBuilder);
|
||||
VALIDATE(args);
|
||||
VALIDATE(pProp);
|
||||
|
||||
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
|
||||
const size_t numGeometries = args->numGeometries;
|
||||
|
||||
auto getSize = [&](uint32_t geomID) -> size_t {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return 0;
|
||||
return getNumPrimitives(geom);
|
||||
};
|
||||
|
||||
auto getType = [&](unsigned int geomID)
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
/* query memory requirements from builder */
|
||||
size_t expectedBytes = 0;
|
||||
size_t worstCaseBytes = 0;
|
||||
size_t scratchBytes = 0;
|
||||
QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes);
|
||||
|
||||
/* fill return struct */
|
||||
pProp->flags = 0;
|
||||
pProp->rtasBufferSizeBytesExpected = expectedBytes;
|
||||
pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes;
|
||||
pProp->scratchBufferSizeBytes = scratchBytes;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t zeRTASBuilderBuildExpBody(const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries;
|
||||
const uint32_t numGeometries = args->numGeometries;
|
||||
|
||||
/* verify input descriptors */
|
||||
parallel_for(numGeometries,[&](uint32_t geomID) {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return;
|
||||
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
});
|
||||
|
||||
auto getSize = [&](uint32_t geomID) -> size_t {
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
if (geom == nullptr) return 0;
|
||||
return getNumPrimitives(geom);
|
||||
};
|
||||
|
||||
auto getType = [&](unsigned int geomID)
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL;
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE;
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
auto createPrimRefArray = [&] (evector<PrimRef>& prims, BBox1f time_range, const range<size_t>& r, size_t k, unsigned int geomID) -> PrimInfo
|
||||
{
|
||||
const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
switch (geom->geometryType) {
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray((ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray((ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray((ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray((ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID);
|
||||
default: throw std::runtime_error("invalid geometry type");
|
||||
};
|
||||
};
|
||||
|
||||
auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags {
|
||||
return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE;
|
||||
};
|
||||
|
||||
auto getTriangle = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle();
|
||||
|
||||
const Vec3f p0 = getVertex(geom,tri.v0);
|
||||
const Vec3f p1 = getVertex(geom,tri.v1);
|
||||
const Vec3f p2 = getVertex(geom,tri.v2);
|
||||
if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle();
|
||||
if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle();
|
||||
|
||||
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
|
||||
return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask);
|
||||
};
|
||||
|
||||
auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) {
|
||||
const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID);
|
||||
return Vec3<uint32_t>(tri.v0,tri.v1,tri.v2);
|
||||
};
|
||||
|
||||
auto getQuad = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
|
||||
const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID);
|
||||
const Vec3f p0 = getVertex(geom,quad.v0);
|
||||
const Vec3f p1 = getVertex(geom,quad.v1);
|
||||
const Vec3f p2 = getVertex(geom,quad.v2);
|
||||
const Vec3f p3 = getVertex(geom,quad.v3);
|
||||
|
||||
const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags);
|
||||
return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask);
|
||||
};
|
||||
|
||||
auto getProcedural = [&](unsigned int geomID, unsigned int primID) {
|
||||
const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID];
|
||||
assert(geom);
|
||||
return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags
|
||||
};
|
||||
|
||||
auto getInstance = [&](unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
assert(geometries[geomID]);
|
||||
assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE);
|
||||
const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID];
|
||||
void* accel = geom->pAccelerationStructure;
|
||||
const AffineSpace3fa local2world = getTransform(geom);
|
||||
return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags
|
||||
};
|
||||
|
||||
/* dispatch globals ptr for debugging purposes */
|
||||
void* dispatchGlobalsPtr = nullptr;
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
if (args->pNext) {
|
||||
zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext;
|
||||
if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC) {
|
||||
ze_rtas_builder_build_op_debug_exp_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_exp_desc_t*) next;
|
||||
dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool verbose = false;
|
||||
bool success = QBVH6BuilderSAH::build(numGeometries, nullptr,
|
||||
getSize, getType,
|
||||
createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance,
|
||||
(char*)pRtasBuffer, rtasBufferSizeBytes,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
(BBox3f*) pBounds, pRtasBufferSizeBytes,
|
||||
args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr);
|
||||
if (!success) {
|
||||
return ZE_RESULT_EXP_RTAS_BUILD_RETRY;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
//std::cerr << "caught exception during BVH build: " << e.what() << std::endl;
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hBuilder);
|
||||
VALIDATE(args);
|
||||
VALIDATE_PTR(pScratchBuffer);
|
||||
VALIDATE_PTR(pRtasBuffer);
|
||||
|
||||
/* if parallel operation is provided then execute using thread arena inside task group ... */
|
||||
if (hParallelOperation)
|
||||
{
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
|
||||
if (op->object_in_use.load())
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
|
||||
op->object_in_use.store(true);
|
||||
|
||||
g_arena.execute([&](){ op->group.run([=](){
|
||||
op->errorCode = zeRTASBuilderBuildExpBody(args,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
pRtasBuffer, rtasBufferSizeBytes,
|
||||
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
|
||||
});
|
||||
});
|
||||
return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED;
|
||||
}
|
||||
/* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */
|
||||
else
|
||||
{
|
||||
ze_result_t errorCode = ZE_RESULT_SUCCESS;
|
||||
g_arena.execute([&](){ errorCode = zeRTASBuilderBuildExpBody(args,
|
||||
pScratchBuffer, scratchBufferSizeBytes,
|
||||
pRtasBuffer, rtasBufferSizeBytes,
|
||||
pBuildUserPtr, pBounds, pRtasBufferSizeBytes);
|
||||
});
|
||||
return errorCode;
|
||||
}
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation)
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hDriver);
|
||||
VALIDATE_PTR(phParallelOperation);
|
||||
|
||||
/* create parallel operation object */
|
||||
*phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
/* delete parallel operation */
|
||||
delete (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties )
|
||||
{
|
||||
/* input validation */
|
||||
VALIDATE(hParallelOperation);
|
||||
VALIDATE(pProperties);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
if (!op->object_in_use.load())
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
/* return properties */
|
||||
pProperties->flags = 0;
|
||||
pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation)
|
||||
{
|
||||
/* check for valid handle */
|
||||
VALIDATE(hParallelOperation);
|
||||
|
||||
ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation;
|
||||
g_arena.execute([&](){ op->group.wait(); });
|
||||
op->object_in_use.store(false); // this is slighty too early
|
||||
return op->errorCode;
|
||||
}
|
||||
}
|
||||
66
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.h
vendored
Normal file
66
Framework/external/embree/kernels/rthwif/rtbuild/rtbuild.h
vendored
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2009-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../level_zero/ze_api.h"
|
||||
|
||||
#if !defined(ZE_RTAS_BUILDER_EXP_NAME)
|
||||
#include "../../level_zero/ze_rtas.h"
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
# define RTHWIF_API_EXTERN_C extern "C"
|
||||
#else
|
||||
# define RTHWIF_API_EXTERN_C
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(EMBREE_RTHWIF_STATIC_LIB)
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C
|
||||
#else
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport)
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport)
|
||||
#endif
|
||||
#else
|
||||
# define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C
|
||||
# define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
|
||||
typedef enum _ze_raytracing_accel_format_internal_t {
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2
|
||||
ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2
|
||||
} ze_raytracing_accel_format_internal_t;
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver,
|
||||
const ze_rtas_format_exp_t accelFormat,
|
||||
const ze_rtas_format_exp_t otherAccelFormat);
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
ze_rtas_builder_exp_properties_t* pProp);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder,
|
||||
const ze_rtas_builder_build_op_exp_desc_t* args,
|
||||
void *pScratchBuffer, size_t scratchBufferSizeBytes,
|
||||
void *pRtasBuffer, size_t rtasBufferSizeBytes,
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation,
|
||||
void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation);
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation );
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties );
|
||||
|
||||
RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation);
|
||||
|
||||
155
Framework/external/embree/kernels/rthwif/rtbuild/statistics.cpp
vendored
Normal file
155
Framework/external/embree/kernels/rthwif/rtbuild/statistics.cpp
vendored
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "statistics.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class RestoreStreamState
|
||||
{
|
||||
public:
|
||||
RestoreStreamState(std::ostream& iostream)
|
||||
: iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) {
|
||||
}
|
||||
|
||||
~RestoreStreamState() {
|
||||
iostream.flags(flags);
|
||||
iostream.precision(precision);
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& iostream;
|
||||
std::ios::fmtflags flags;
|
||||
std::streamsize precision;
|
||||
};
|
||||
|
||||
double ratio(double a, double b)
|
||||
{
|
||||
if (b == 0.0) return 0.0f;
|
||||
else return a/b;
|
||||
}
|
||||
|
||||
double percent(double a, double b) {
|
||||
return 100.0*ratio(a,b);
|
||||
}
|
||||
|
||||
double ratio(size_t a, size_t b) {
|
||||
return ratio(double(a), double(b));
|
||||
}
|
||||
double percent(size_t a, size_t b) {
|
||||
return percent(double(a), double(b));
|
||||
}
|
||||
|
||||
void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
cout << std::setw(7) << numNodes << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << sah();
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
|
||||
cout << std::endl;
|
||||
}
|
||||
|
||||
void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
size_t N = blocks ? numBlocks : numLeaves;
|
||||
cout << std::setw(7) << N << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << sah();
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% ";
|
||||
cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " ";
|
||||
cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% ";
|
||||
cout << std::endl;
|
||||
}
|
||||
|
||||
void BVHStatistics::print (std::ostream& cout) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
cout.setf(std::ios::fixed, std::ios::floatfield);
|
||||
cout.fill(' ');
|
||||
|
||||
double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH;
|
||||
size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes();
|
||||
size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves;
|
||||
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
|
||||
|
||||
cout << std::endl;
|
||||
cout << "BVH statistics:" << std::endl;
|
||||
cout << "---------------" << std::endl;
|
||||
cout << " numScenePrimitives = " << numScenePrimitives << std::endl;
|
||||
cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl;
|
||||
cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl;
|
||||
cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl;
|
||||
cout << " numBVHPrimitives = " << totalPrimitives << std::endl;
|
||||
cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl;
|
||||
cout << std::endl;
|
||||
|
||||
cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl;
|
||||
cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl;
|
||||
cout << " total : ";
|
||||
cout << std::setw(7) << totalNodes << " ";
|
||||
cout << std::setw(7) << std::setprecision(3) << totalSAH;
|
||||
cout << " 100.00% ";
|
||||
cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB ";
|
||||
cout << " 100.00% ";
|
||||
cout << " 100.00% ";
|
||||
cout << " ";
|
||||
cout << " ";
|
||||
cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl;
|
||||
|
||||
LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf;
|
||||
cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true);
|
||||
cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives);
|
||||
}
|
||||
|
||||
void BVHStatistics::print_raw(std::ostream& cout) const
|
||||
{
|
||||
RestoreStreamState iostate(cout);
|
||||
size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed;
|
||||
cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl;
|
||||
|
||||
cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl;
|
||||
cout << "bvh_internal_num = " << internalNode.numNodes << std::endl;
|
||||
cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl;
|
||||
cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl;
|
||||
cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl;
|
||||
|
||||
cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl;
|
||||
|
||||
cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl;
|
||||
|
||||
cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl;
|
||||
cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl;
|
||||
cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl;
|
||||
cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl;
|
||||
cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl;
|
||||
cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl;
|
||||
}
|
||||
}
|
||||
118
Framework/external/embree/kernels/rthwif/rtbuild/statistics.h
vendored
Normal file
118
Framework/external/embree/kernels/rthwif/rtbuild/statistics.h
vendored
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING)
|
||||
#include "sys/platform.h"
|
||||
#else
|
||||
#include "../../../common/sys/platform.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct BVHStatistics
|
||||
{
|
||||
struct NodeStat
|
||||
{
|
||||
NodeStat ( double nodeSAH = 0,
|
||||
size_t numNodes = 0,
|
||||
size_t numChildrenUsed = 0,
|
||||
size_t numChildrenTotal = 0,
|
||||
size_t numBytes = 0)
|
||||
: nodeSAH(nodeSAH),
|
||||
numNodes(numNodes),
|
||||
numChildrenUsed(numChildrenUsed),
|
||||
numChildrenTotal(numChildrenTotal),
|
||||
numBytes(numBytes) {}
|
||||
|
||||
double sah() const { return nodeSAH; }
|
||||
size_t bytes() const { return numBytes; }
|
||||
size_t size() const { return numNodes; }
|
||||
|
||||
double fillRateNom () const { return double(numChildrenUsed); }
|
||||
double fillRateDen () const { return double(numChildrenTotal); }
|
||||
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
|
||||
|
||||
friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
|
||||
{
|
||||
return NodeStat(a.nodeSAH + b.nodeSAH,
|
||||
a.numNodes+b.numNodes,
|
||||
a.numChildrenUsed+b.numChildrenUsed,
|
||||
a.numChildrenTotal+b.numChildrenTotal,
|
||||
a.numBytes+b.numBytes);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const;
|
||||
|
||||
public:
|
||||
double nodeSAH;
|
||||
size_t numNodes;
|
||||
size_t numChildrenUsed;
|
||||
size_t numChildrenTotal;
|
||||
size_t numBytes;
|
||||
};
|
||||
|
||||
struct LeafStat
|
||||
{
|
||||
LeafStat(double leafSAH = 0.0f,
|
||||
size_t numLeaves = 0,
|
||||
size_t numBlocks = 0,
|
||||
size_t numPrimsUsed = 0,
|
||||
size_t numPrimsTotal = 0,
|
||||
size_t numBytesUsed = 0,
|
||||
size_t numBytesTotal = 0)
|
||||
: leafSAH(leafSAH),
|
||||
numLeaves(numLeaves),
|
||||
numBlocks(numBlocks),
|
||||
numPrimsUsed(numPrimsUsed),
|
||||
numPrimsTotal(numPrimsTotal),
|
||||
numBytesUsed(numBytesUsed),
|
||||
numBytesTotal(numBytesTotal) {}
|
||||
|
||||
double sah() const { return leafSAH; }
|
||||
size_t bytes() const { return numBytesTotal; }
|
||||
size_t size() const { return numLeaves; }
|
||||
|
||||
double fillRateNom () const { return double(numPrimsUsed); }
|
||||
double fillRateDen () const { return double(numPrimsTotal); }
|
||||
double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; }
|
||||
|
||||
friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
|
||||
{
|
||||
return LeafStat(a.leafSAH + b.leafSAH,
|
||||
a.numLeaves+b.numLeaves,
|
||||
a.numBlocks+b.numBlocks,
|
||||
a.numPrimsUsed+b.numPrimsUsed,
|
||||
a.numPrimsTotal+b.numPrimsTotal,
|
||||
a.numBytesUsed+b.numBytesUsed,
|
||||
a.numBytesTotal+b.numBytesTotal);
|
||||
}
|
||||
|
||||
void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const;
|
||||
|
||||
public:
|
||||
double leafSAH; //!< SAH of the leaves only
|
||||
size_t numLeaves; //!< Number of leaf nodes.
|
||||
size_t numBlocks; //!< Number of blocks referenced
|
||||
size_t numPrimsUsed; //!< Number of active primitives
|
||||
size_t numPrimsTotal; //!< Number of active and inactive primitives
|
||||
size_t numBytesUsed; //!< Number of used bytes
|
||||
size_t numBytesTotal; //!< Number of total bytes of leaves.
|
||||
};
|
||||
|
||||
BVHStatistics ()
|
||||
: numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {}
|
||||
|
||||
void print (std::ostream& cout) const;
|
||||
void print_raw(std::ostream& cout) const;
|
||||
|
||||
size_t numScenePrimitives;
|
||||
size_t numBuildPrimitives;
|
||||
size_t numBuildPrimitivesPostSplit;
|
||||
NodeStat internalNode;
|
||||
LeafStat quadLeaf;
|
||||
LeafStat proceduralLeaf;
|
||||
LeafStat instanceLeaf;
|
||||
};
|
||||
}
|
||||
266
Framework/external/embree/kernels/rthwif/rttrace/rttrace.h
vendored
Normal file
266
Framework/external/embree/kernels/rthwif/rttrace/rttrace.h
vendored
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ZE_RAYTRACING_RT_SIMULATION)
|
||||
#include "rtcore.h"
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_RT_VALIDATION_API)
|
||||
# include "rttrace_validation.h"
|
||||
#else
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-W#pragma-messages"
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
|
||||
|
||||
enum intel_ray_flags_t
|
||||
{
|
||||
intel_ray_flags_none = 0x00,
|
||||
intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation)
|
||||
intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader)
|
||||
intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays)
|
||||
intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader
|
||||
intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit
|
||||
intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit
|
||||
intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit
|
||||
intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit
|
||||
intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses.
|
||||
intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders
|
||||
};
|
||||
|
||||
enum intel_hit_type_t
|
||||
{
|
||||
intel_hit_type_committed_hit = 0,
|
||||
intel_hit_type_potential_hit = 1,
|
||||
};
|
||||
|
||||
enum intel_raytracing_ext_flag_t
|
||||
{
|
||||
intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported
|
||||
};
|
||||
|
||||
// opaque types
|
||||
typedef __attribute__((opencl_private)) struct intel_ray_query_opaque_t* intel_ray_query_t;
|
||||
typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t;
|
||||
|
||||
struct intel_float2
|
||||
{
|
||||
float x, y;
|
||||
|
||||
intel_float2() {}
|
||||
|
||||
intel_float2(float x, float y)
|
||||
: x(x), y(y) {}
|
||||
|
||||
intel_float2(sycl::float2 v)
|
||||
: x(v.x()), y(v.y()) {}
|
||||
|
||||
operator sycl::float2() {
|
||||
return sycl::float2(x,y);
|
||||
}
|
||||
};
|
||||
|
||||
struct intel_float3
|
||||
{
|
||||
float x, y, z;
|
||||
|
||||
intel_float3() {}
|
||||
|
||||
intel_float3(float x, float y, float z)
|
||||
: x(x), y(y), z(z) {}
|
||||
|
||||
intel_float3(sycl::float3 v)
|
||||
: x(v.x()), y(v.y()), z(v.z()) {}
|
||||
|
||||
operator sycl::float3() {
|
||||
return sycl::float3(x,y,z);
|
||||
}
|
||||
};
|
||||
|
||||
struct intel_float4x3 {
|
||||
intel_float3 vx, vy, vz, p;
|
||||
};
|
||||
|
||||
struct intel_ray_desc_t
|
||||
{
|
||||
intel_float3 origin;
|
||||
intel_float3 direction;
|
||||
float tmin;
|
||||
float tmax;
|
||||
unsigned int mask;
|
||||
intel_ray_flags_t flags;
|
||||
};
|
||||
|
||||
// if traversal returns one can test if a triangle or procedural is hit
|
||||
enum intel_candidate_type_t
|
||||
{
|
||||
intel_candidate_type_triangle,
|
||||
intel_candidate_type_procedural
|
||||
};
|
||||
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
|
||||
|
||||
// check supported ray tracing features
|
||||
SYCL_EXTERNAL extern "C" intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag();
|
||||
|
||||
// initializes a ray query
|
||||
SYCL_EXTERNAL extern "C" intel_ray_query_t intel_ray_query_init(
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
);
|
||||
|
||||
// setup for instance traversal using a transformed ray and bottom-level AS
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_forward_ray(
|
||||
intel_ray_query_t query,
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
);
|
||||
|
||||
// commit the potential hit
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit(
|
||||
intel_ray_query_t query
|
||||
);
|
||||
|
||||
// commit the potential hit and override hit distance and UVs
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit_override(
|
||||
intel_ray_query_t query,
|
||||
float override_hit_distance,
|
||||
intel_float2 override_uv
|
||||
);
|
||||
|
||||
// start traversal of a ray query
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_start_traversal( intel_ray_query_t query );
|
||||
|
||||
// synchronize rayquery execution. If a ray was dispatched,
|
||||
// This must be called prior to calling any of the accessors below.
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_sync( intel_ray_query_t query );
|
||||
|
||||
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
|
||||
// this function does an implicit sync
|
||||
SYCL_EXTERNAL extern "C" void intel_ray_query_abandon( intel_ray_query_t query );
|
||||
|
||||
// read hit information during shader execution
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for quad leaves
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for procedural leaves
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
|
||||
// fetch triangle vertices for a hit
|
||||
SYCL_EXTERNAL extern "C" void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type );
|
||||
|
||||
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
|
||||
// during any-hit or intersection shader execution.
|
||||
SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL extern "C" float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL extern "C" intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level );
|
||||
|
||||
SYCL_EXTERNAL extern "C" intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type );
|
||||
|
||||
// test whether traversal has terminated. If false, the ray has reached
|
||||
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
|
||||
SYCL_EXTERNAL extern "C" bool intel_is_traversal_done( intel_ray_query_t query );
|
||||
|
||||
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
|
||||
SYCL_EXTERNAL extern "C" bool intel_has_committed_hit( intel_ray_query_t query );
|
||||
|
||||
#else
|
||||
|
||||
inline intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag() {
|
||||
return intel_raytracing_ext_flag_ray_query;
|
||||
}
|
||||
|
||||
inline intel_ray_query_t intel_ray_query_init(
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
) { return NULL; }
|
||||
|
||||
// setup for instance traversal using a transformed ray and bottom-level AS
|
||||
inline void intel_ray_query_forward_ray(
|
||||
intel_ray_query_t query,
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
) {}
|
||||
|
||||
// commit the potential hit
|
||||
inline void intel_ray_query_commit_potential_hit(
|
||||
intel_ray_query_t query
|
||||
) {}
|
||||
|
||||
// commit the potential hit and override hit distance and UVs
|
||||
inline void intel_ray_query_commit_potential_hit_override(
|
||||
intel_ray_query_t query,
|
||||
float override_hit_distance,
|
||||
intel_float2 override_uv
|
||||
) {}
|
||||
|
||||
// start traversal of a ray query
|
||||
inline void intel_ray_query_start_traversal( intel_ray_query_t query ) {}
|
||||
|
||||
// synchronize rayquery execution. If a ray was dispatched,
|
||||
// This must be called prior to calling any of the accessors below.
|
||||
inline void intel_ray_query_sync( intel_ray_query_t query ) {}
|
||||
|
||||
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
|
||||
// this function does an implicit sync
|
||||
inline void intel_ray_query_abandon( intel_ray_query_t query ) {}
|
||||
|
||||
// read hit information during shader execution
|
||||
inline unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
|
||||
inline float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0.0f; }
|
||||
inline intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { 0,0 }; }
|
||||
inline bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type ) { return false; }
|
||||
inline unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
|
||||
inline unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
|
||||
inline unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for quad leaves
|
||||
inline unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for procedural leaves
|
||||
inline unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
|
||||
inline unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; }
|
||||
inline intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; }
|
||||
inline intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; }
|
||||
|
||||
// fetch triangle vertices for a hit
|
||||
inline void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ) {}
|
||||
|
||||
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
|
||||
// during any-hit or intersection shader execution.
|
||||
inline intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; }
|
||||
inline intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; }
|
||||
inline float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level ) { return 0.0f; }
|
||||
inline intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level ) { return intel_ray_flags_none; }
|
||||
inline unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level ) { return 0; }
|
||||
|
||||
inline intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type ) { return intel_candidate_type_triangle; }
|
||||
|
||||
// test whether traversal has terminated. If false, the ray has reached
|
||||
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
|
||||
inline bool intel_is_traversal_done( intel_ray_query_t query ) { return false; }
|
||||
|
||||
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
|
||||
inline bool intel_has_committed_hit( intel_ray_query_t query ) { return false; }
|
||||
|
||||
#endif
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#endif
|
||||
293
Framework/external/embree/kernels/rthwif/rttrace/rttrace_internal.h
vendored
Normal file
293
Framework/external/embree/kernels/rthwif/rttrace/rttrace_internal.h
vendored
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
# define MemRay MemRayV1
|
||||
# define MemHit MemHitV1
|
||||
# define QuadLeaf QuadLeafV1
|
||||
# define InstanceLeaf InstanceLeafV1
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
enum TraceRayCtrl
|
||||
{
|
||||
TRACE_RAY_INITIAL = 0, // Initializes hit and initializes traversal state
|
||||
TRACE_RAY_INSTANCE = 1, // Loads committed hit and initializes traversal state
|
||||
TRACE_RAY_COMMIT = 2, // Loads potential hit and loads traversal state
|
||||
TRACE_RAY_CONTINUE = 3, // Loads committed hit and loads traversal state
|
||||
TRACE_RAY_DONE = 256, // for internal use only
|
||||
};
|
||||
|
||||
typedef __attribute__((opencl_global)) struct rtglobals_opaque_t* rtglobals_t;
|
||||
typedef __attribute__((opencl_private)) struct rtfence_opaque_t* rtfence_t;
|
||||
|
||||
#if defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION)
|
||||
|
||||
SYCL_EXTERNAL extern "C" __attribute__((opencl_global)) void* intel_get_implicit_dispatch_globals();
|
||||
SYCL_EXTERNAL extern "C" void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals);
|
||||
SYCL_EXTERNAL extern "C" void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals);
|
||||
SYCL_EXTERNAL extern "C" void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals);
|
||||
SYCL_EXTERNAL extern "C" rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl);
|
||||
SYCL_EXTERNAL extern "C" void intel_rt_sync(rtfence_t fence);
|
||||
|
||||
#else
|
||||
|
||||
inline void* intel_get_implicit_dispatch_globals() { return nullptr; }
|
||||
inline void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
|
||||
inline void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
|
||||
inline void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; }
|
||||
inline rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl) { return nullptr; }
|
||||
inline void intel_rt_sync(rtfence_t fence) {}
|
||||
|
||||
#endif
|
||||
|
||||
enum NodeType
|
||||
{
|
||||
NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
|
||||
NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
|
||||
NODE_TYPE_INSTANCE = 0x1, // instance leaf
|
||||
NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
|
||||
NODE_TYPE_QUAD = 0x4, // quad leaf
|
||||
NODE_TYPE_INVALID = 0x7 // indicates invalid node
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(32))) MemRayV1
|
||||
{
|
||||
void init(intel_ray_desc_t ray, uint64_t rootNodePtr_i)
|
||||
{
|
||||
org[0] = ray.origin.x;
|
||||
org[1] = ray.origin.y;
|
||||
org[2] = ray.origin.z;
|
||||
dir[0] = ray.direction.x;
|
||||
dir[1] = ray.direction.y;
|
||||
dir[2] = ray.direction.z;
|
||||
tnear = ray.tmin;
|
||||
tfar = ray.tmax;
|
||||
rootNodePtr = rootNodePtr_i;
|
||||
rayFlags = ray.flags;
|
||||
hitGroupSRBasePtr = 0;
|
||||
hitGroupSRStride = 0;
|
||||
missSRPtr = 0;
|
||||
pad0 = 0;
|
||||
shaderIndexMultiplier = 0;
|
||||
instLeafPtr = 0;
|
||||
rayMask = ray.mask;
|
||||
pad1 = 0;
|
||||
}
|
||||
|
||||
// 32 B
|
||||
float org[3];
|
||||
float dir[3];
|
||||
float tnear;
|
||||
float tfar;
|
||||
|
||||
// 32 B
|
||||
struct { // FIXME: removing these anonymous structs triggers IGC bug
|
||||
uint64_t rootNodePtr : 48; // root node to start traversal at
|
||||
uint64_t rayFlags : 16; // ray flags (see RayFlag structure)
|
||||
};
|
||||
|
||||
struct {
|
||||
uint64_t hitGroupSRBasePtr : 48; // base of hit group shader record array (16-bytes alignment)
|
||||
uint64_t hitGroupSRStride : 16; // stride of hit group shader record array (16-bytes alignment)
|
||||
};
|
||||
|
||||
struct {
|
||||
uint64_t missSRPtr : 48; // pointer to miss shader record to invoke on a miss (8-bytes alignment)
|
||||
uint64_t pad0 : 8; // padding byte (has to be zero)
|
||||
uint64_t shaderIndexMultiplier : 8; // shader index multiplier
|
||||
};
|
||||
|
||||
struct {
|
||||
uint64_t instLeafPtr : 48; // the pointer to instance leaf in case we traverse an instance (64-bytes alignment)
|
||||
uint64_t rayMask : 8; // ray mask used for ray masking
|
||||
uint64_t pad1 : 8; // padding byte (has to be zero)
|
||||
};
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(32))) MemHitV1
|
||||
{
|
||||
inline float getT() const {
|
||||
return ft;
|
||||
}
|
||||
|
||||
inline void setT(float t) {
|
||||
ft = t;
|
||||
}
|
||||
|
||||
inline float getU() const {
|
||||
return fu;
|
||||
}
|
||||
|
||||
inline void setU(float u) {
|
||||
fu = u;
|
||||
}
|
||||
|
||||
inline float getV() const {
|
||||
return fv;
|
||||
}
|
||||
|
||||
inline void setV(float v) {
|
||||
fv = v;
|
||||
}
|
||||
|
||||
inline void* getPrimLeafPtr() {
|
||||
return sycl::global_ptr<void>((void*)(uint64_t(primLeafPtr)*64)).get();
|
||||
}
|
||||
|
||||
inline void* getInstanceLeafPtr() {
|
||||
return sycl::global_ptr<void>((void*)(uint64_t(instLeafPtr)*64)).get();
|
||||
}
|
||||
|
||||
public:
|
||||
float ft; // hit distance of current hit (or initial traversal distance)
|
||||
float fu,fv; // barycentric hit coordinates
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t primIndexDelta : 16; // prim index delta for compressed meshlets and quads
|
||||
uint32_t valid : 1; // set if there is a hit
|
||||
uint32_t leafType : 3; // type of node primLeafPtr is pointing to
|
||||
uint32_t primLeafIndex : 4; // index of the hit primitive inside the leaf
|
||||
uint32_t bvhLevel : 3; // the instancing level at which the hit occured
|
||||
uint32_t frontFace : 1; // whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders)
|
||||
uint32_t done : 1; // used in sync mode to indicate that traversal is done
|
||||
uint32_t pad0 : 3; // unused bits
|
||||
};
|
||||
uint32_t data;
|
||||
};
|
||||
|
||||
struct { // FIXME: removing these anonymous structs triggers IGC bug
|
||||
uint64_t primLeafPtr : 42; // pointer to BVH leaf node (multiple of 64 bytes)
|
||||
uint64_t hitGroupRecPtr0 : 22; // LSB of hit group record of the hit triangle (multiple of 16 bytes)
|
||||
};
|
||||
|
||||
struct {
|
||||
uint64_t instLeafPtr : 42; // pointer to BVH instance leaf node (in multiple of 64 bytes)
|
||||
uint64_t hitGroupRecPtr1 : 22; // MSB of hit group record of the hit triangle (multiple of 16 bytes)
|
||||
};
|
||||
|
||||
void clear(bool _done, bool _valid) {
|
||||
//*(sycl::int8*) this = sycl::int8(0x7F800000 /* INFINITY */, 0, 0, (_done ? 0x10000000 : 0) | (_valid ? 0x10000), 0, 0, 0, 0);
|
||||
ft = fu = fv = 0.0f;
|
||||
data = 0;
|
||||
done = _done ? 1 : 0;
|
||||
valid = _valid ? 1 : 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(64))) RTStack
|
||||
{
|
||||
union {
|
||||
struct {
|
||||
struct MemHit committedHit; // stores committed hit
|
||||
struct MemHit potentialHit; // stores potential hit that is passed to any hit shader
|
||||
};
|
||||
struct MemHit hit[2]; // committedHit, potentialHit
|
||||
};
|
||||
struct MemRay ray[2];
|
||||
char travStack[32*2];
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed)) HWAccel
|
||||
{
|
||||
uint64_t reserved;
|
||||
float bounds[2][3]; // bounding box of the BVH
|
||||
uint32_t reserved0[8];
|
||||
uint32_t numTimeSegments;
|
||||
uint32_t reserved1[13];
|
||||
uint64_t dispatchGlobalsPtr;
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(8))) PrimLeafDesc
|
||||
{
|
||||
struct {
|
||||
uint32_t shaderIndex : 24; // shader index used for shader record calculations
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
};
|
||||
|
||||
struct {
|
||||
uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
|
||||
uint32_t type : 1; // enable/disable culling for procedurals and instances
|
||||
uint32_t geomFlags : 2; // geometry flags of this geometry
|
||||
};
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(64))) QuadLeafV1
|
||||
{
|
||||
struct PrimLeafDesc leafDesc;
|
||||
unsigned int primIndex0;
|
||||
struct {
|
||||
uint32_t primIndex1Delta : 16; // delta encoded primitive index of second triangle
|
||||
uint32_t j0 : 2; // specifies first vertex of second triangle
|
||||
uint32_t j1 : 2; // specified second vertex of second triangle
|
||||
uint32_t j2 : 2; // specified third vertex of second triangle
|
||||
uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list
|
||||
uint32_t pad : 9; // unused bits
|
||||
};
|
||||
float v[4][3];
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(64))) ProceduralLeaf
|
||||
{
|
||||
static const constexpr uint32_t N = 13;
|
||||
|
||||
struct PrimLeafDesc leafDesc; // leaf header identifying the geometry
|
||||
struct {
|
||||
uint32_t numPrimitives : 4; // number of stored primitives
|
||||
uint32_t pad : 32-4-N;
|
||||
uint32_t last : N; // bit vector with a last bit per primitive
|
||||
};
|
||||
uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf
|
||||
};
|
||||
|
||||
struct __attribute__ ((packed,aligned(64))) InstanceLeafV1
|
||||
{
|
||||
/* first 64 bytes accessed during traversal by hardware */
|
||||
struct Part0
|
||||
{
|
||||
public:
|
||||
struct {
|
||||
uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing
|
||||
uint32_t geomMask : 8; // geometry mask used for ray masking
|
||||
};
|
||||
|
||||
struct {
|
||||
uint32_t instanceContributionToHitGroupIndex : 24;
|
||||
uint32_t pad0 : 5;
|
||||
|
||||
/* the following two entries are only used for procedural instances */
|
||||
uint32_t type : 1; // enables/disables opaque culling
|
||||
uint32_t geomFlags : 2; // unused for instances
|
||||
};
|
||||
|
||||
struct {
|
||||
uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object
|
||||
uint64_t instFlags : 8; // flags for the instance (see InstanceFlags)
|
||||
uint64_t pad1 : 8; // unused bits
|
||||
};
|
||||
|
||||
float world2obj_vx[3]; // 1st column of Worl2Obj transform
|
||||
float world2obj_vy[3]; // 2nd column of Worl2Obj transform
|
||||
float world2obj_vz[3]; // 3rd column of Worl2Obj transform
|
||||
float obj2world_p[3]; // translation of Obj2World transform (on purpose in first 64 bytes)
|
||||
} part0;
|
||||
|
||||
/* second 64 bytes accessed during shading */
|
||||
struct Part1
|
||||
{
|
||||
struct {
|
||||
uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too
|
||||
uint64_t pad : 16; // unused bits
|
||||
};
|
||||
|
||||
uint32_t instanceID; // user defined value per DXR spec
|
||||
uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene)
|
||||
|
||||
float obj2world_vx[3]; // 1st column of Obj2World transform
|
||||
float obj2world_vy[3]; // 2nd column of Obj2World transform
|
||||
float obj2world_vz[3]; // 3rd column of Obj2World transform
|
||||
float world2obj_p[3]; // translation of World2Obj transform
|
||||
} part1;
|
||||
};
|
||||
287
Framework/external/embree/kernels/rthwif/rttrace/rttrace_validation.cpp
vendored
Normal file
287
Framework/external/embree/kernels/rthwif/rttrace/rttrace_validation.cpp
vendored
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "rttrace_validation.h"
|
||||
|
||||
#define sizeof_QBVH6_InternalNode6 64
|
||||
#define QBVH6_rootNodeOffset 128
|
||||
|
||||
/*struct rayquery_impl_t {
|
||||
rtfence_t fence;
|
||||
rtglobals_t dispatchGlobalsPtr;
|
||||
struct RTStack* rtStack;
|
||||
TraceRayCtrl ctrl;
|
||||
unsigned int bvh_level;
|
||||
};*/
|
||||
|
||||
void use_rthwif_production()
|
||||
{
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag()
|
||||
{
|
||||
return intel_raytracing_ext_flag_ray_query;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i )
|
||||
{
|
||||
unsigned int bvh_level = 0;
|
||||
|
||||
//intel_raytracing_acceleration_structure_t* accel_i = sycl::global_ptr<intel_raytracing_acceleration_structure_t>(_accel_i).get();
|
||||
HWAccel* accel = (HWAccel*)accel_i;
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) accel->dispatchGlobalsPtr;
|
||||
#else
|
||||
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) intel_get_implicit_dispatch_globals();
|
||||
#endif
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)intel_get_rt_stack( (rtglobals_t)dispatchGlobalsPtr )).get();
|
||||
|
||||
/* init ray */
|
||||
rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset);
|
||||
|
||||
rtStack->committedHit.setT(INFINITY);
|
||||
rtStack->committedHit.setU(0.0f);
|
||||
rtStack->committedHit.setV(0.0f);
|
||||
rtStack->committedHit.data = 0;
|
||||
|
||||
rtStack->potentialHit.setT(INFINITY);
|
||||
rtStack->potentialHit.setU(0.0f);
|
||||
rtStack->potentialHit.setV(0.0f);
|
||||
rtStack->potentialHit.data = 0;
|
||||
rtStack->potentialHit.done = 1;
|
||||
rtStack->potentialHit.valid = 1;
|
||||
|
||||
return { nullptr, (void*) dispatchGlobalsPtr, rtStack, TRACE_RAY_INITIAL, bvh_level };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_forward_ray( intel_ray_query_t& query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i)
|
||||
{
|
||||
HWAccel* accel = (HWAccel*)accel_i;
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
/* init ray */
|
||||
unsigned int bvh_level = query.bvh_level+1;
|
||||
rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset);
|
||||
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_INSTANCE, bvh_level };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( intel_ray_query_t& query )
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
unsigned int bvh_level = query.bvh_level;
|
||||
unsigned int rflags = rtStack->ray[bvh_level].rayFlags;
|
||||
if (rflags & intel_ray_flags_accept_first_hit_and_end_search) {
|
||||
rtStack->committedHit = rtStack->potentialHit;
|
||||
rtStack->committedHit.valid = 1;
|
||||
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_DONE, bvh_level };
|
||||
} else {
|
||||
rtStack->potentialHit.valid = 1; // FIXME: is this required?
|
||||
query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_COMMIT, bvh_level };
|
||||
}
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( intel_ray_query_t& query, float override_hit_distance, intel_float2 override_uv )
|
||||
{
|
||||
//struct RTStack* rtStack = (struct RTStack*) query.opaque2;
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
rtStack->potentialHit.setT(override_hit_distance);
|
||||
rtStack->potentialHit.setU(override_uv.x);
|
||||
rtStack->potentialHit.setV(override_uv.y);
|
||||
intel_ray_query_commit_potential_hit(query);
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query )
|
||||
{
|
||||
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) query.opaque1;
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
rtStack->potentialHit.done = 1;
|
||||
rtStack->potentialHit.valid = 1;
|
||||
|
||||
if (query.ctrl == TRACE_RAY_DONE) return;
|
||||
rtfence_t fence = intel_dispatch_trace_ray_query(dispatchGlobalsPtr,query.bvh_level,query.ctrl);
|
||||
query = { (void*) fence, query.opaque1, query.opaque2, TRACE_RAY_INITIAL, 0 };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query )
|
||||
{
|
||||
intel_rt_sync((rtfence_t)query.opaque0);
|
||||
|
||||
/* continue is default behaviour */
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
unsigned int bvh_level = rtStack->potentialHit.bvhLevel;
|
||||
query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_sync_ray_query( intel_ray_query_t& query )
|
||||
{
|
||||
intel_rt_sync((rtfence_t)query.opaque0);
|
||||
|
||||
/* continue is default behaviour */
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
unsigned int bvh_level = rtStack->potentialHit.bvhLevel;
|
||||
query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query )
|
||||
{
|
||||
intel_ray_query_sync(query);
|
||||
query = { nullptr, nullptr, nullptr, TRACE_RAY_INITIAL, 0 };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
|
||||
return query.hit(hit_type).bvhLevel;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
|
||||
return query.hit(hit_type).getT();
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
|
||||
return { query.hit(hit_type).getU(), query.hit(hit_type).getV() };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ) {
|
||||
return query.hit(hit_type).frontFace;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
struct PrimLeafDesc* __restrict leaf = (struct PrimLeafDesc*)query.hit(hit_type).getPrimLeafPtr();
|
||||
return leaf->geomIndex;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
void* __restrict leaf = hit.getPrimLeafPtr();
|
||||
|
||||
if (hit.leafType == NODE_TYPE_QUAD)
|
||||
return ((QuadLeaf*)leaf)->primIndex0 + hit.primIndexDelta;
|
||||
else
|
||||
return ((ProceduralLeaf*)leaf)->_primIndex[hit.primLeafIndex];
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
QuadLeaf* __restrict leaf = (QuadLeaf*) hit.getPrimLeafPtr();
|
||||
|
||||
return leaf->primIndex0 + hit.primIndexDelta;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
ProceduralLeaf* __restrict leaf = (ProceduralLeaf*) hit.getPrimLeafPtr();
|
||||
return leaf->_primIndex[hit.primLeafIndex];
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
|
||||
if (leaf == nullptr) return -1;
|
||||
return leaf->part1.instanceIndex;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
|
||||
if (leaf == nullptr) return -1;
|
||||
return leaf->part1.instanceID;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
|
||||
if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } };
|
||||
return {
|
||||
{ leaf->part0.world2obj_vx[0], leaf->part0.world2obj_vx[1], leaf->part0.world2obj_vx[2] },
|
||||
{ leaf->part0.world2obj_vy[0], leaf->part0.world2obj_vy[1], leaf->part0.world2obj_vy[2] },
|
||||
{ leaf->part0.world2obj_vz[0], leaf->part0.world2obj_vz[1], leaf->part0.world2obj_vz[2] },
|
||||
{ leaf->part1.world2obj_p [0], leaf->part1.world2obj_p [1], leaf->part1.world2obj_p [2] }
|
||||
};
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type )
|
||||
{
|
||||
MemHit& hit = query.hit(hit_type);
|
||||
InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr();
|
||||
if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } };
|
||||
return {
|
||||
{ leaf->part1.obj2world_vx[0], leaf->part1.obj2world_vx[1], leaf->part1.obj2world_vx[2] },
|
||||
{ leaf->part1.obj2world_vy[0], leaf->part1.obj2world_vy[1], leaf->part1.obj2world_vy[2] },
|
||||
{ leaf->part1.obj2world_vz[0], leaf->part1.obj2world_vz[1], leaf->part1.obj2world_vz[2] },
|
||||
{ leaf->part0.obj2world_p [0], leaf->part0.obj2world_p [1], leaf->part0.obj2world_p [2] }
|
||||
};
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 verts_out[3], intel_hit_type_t hit_type )
|
||||
{
|
||||
const QuadLeaf* __restrict leaf = (const QuadLeaf*) query.hit(hit_type).getPrimLeafPtr();
|
||||
|
||||
unsigned int j0 = 0, j1 = 1, j2 = 2;
|
||||
if (query.hit(hit_type).primLeafIndex != 0)
|
||||
{
|
||||
j0 = leaf->j0;
|
||||
j1 = leaf->j1;
|
||||
j2 = leaf->j2;
|
||||
}
|
||||
|
||||
verts_out[0] = { leaf->v[j0][0], leaf->v[j0][1], leaf->v[j0][2] };
|
||||
verts_out[1] = { leaf->v[j1][0], leaf->v[j1][1], leaf->v[j1][2] };
|
||||
verts_out[2] = { leaf->v[j2][0], leaf->v[j2][1], leaf->v[j2][2] };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level)
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
|
||||
MemRay& ray = rtStack->ray[bvh_level];
|
||||
return { ray.org[0], ray.org[1], ray.org[2] };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level)
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
MemRay& ray = rtStack->ray[bvh_level];
|
||||
return { ray.dir[0], ray.dir[1], ray.dir[2] };
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level)
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
return rtStack->ray[bvh_level].tnear;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level)
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
return (intel_ray_flags_t) rtStack->ray[bvh_level].rayFlags;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level)
|
||||
{
|
||||
struct RTStack* __restrict rtStack = sycl::global_ptr<RTStack>((struct RTStack*)query.opaque2).get();
|
||||
return rtStack->ray[bvh_level].rayMask;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ) {
|
||||
return query.hit(intel_hit_type_potential_hit).done;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type) {
|
||||
return query.hit(hit_type).leafType == NODE_TYPE_QUAD ? intel_candidate_type_triangle : intel_candidate_type_procedural;
|
||||
}
|
||||
|
||||
SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ) {
|
||||
return query.hit(intel_hit_type_committed_hit).valid;
|
||||
}
|
||||
|
||||
180
Framework/external/embree/kernels/rthwif/rttrace/rttrace_validation.h
vendored
Normal file
180
Framework/external/embree/kernels/rthwif/rttrace/rttrace_validation.h
vendored
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-W#pragma-messages"
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
enum intel_ray_flags_t
|
||||
{
|
||||
intel_ray_flags_none = 0x00,
|
||||
intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation)
|
||||
intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader)
|
||||
intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays)
|
||||
intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader
|
||||
intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit
|
||||
intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit
|
||||
intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit
|
||||
intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit
|
||||
intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses.
|
||||
intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders
|
||||
};
|
||||
|
||||
enum intel_hit_type_t
|
||||
{
|
||||
intel_hit_type_committed_hit = 0,
|
||||
intel_hit_type_potential_hit = 1,
|
||||
};
|
||||
|
||||
enum intel_raytracing_ext_flag_t
|
||||
{
|
||||
intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported
|
||||
};
|
||||
|
||||
struct intel_float2
|
||||
{
|
||||
float x, y;
|
||||
|
||||
intel_float2() {}
|
||||
|
||||
intel_float2(float x, float y)
|
||||
: x(x), y(y) {}
|
||||
|
||||
intel_float2(sycl::float2 v)
|
||||
: x(v.x()), y(v.y()) {}
|
||||
|
||||
operator sycl::float2() {
|
||||
return sycl::float2(x,y);
|
||||
}
|
||||
};
|
||||
|
||||
struct intel_float3
|
||||
{
|
||||
float x, y, z;
|
||||
|
||||
intel_float3() {}
|
||||
|
||||
intel_float3(float x, float y, float z)
|
||||
: x(x), y(y), z(z) {}
|
||||
|
||||
intel_float3(sycl::float3 v)
|
||||
: x(v.x()), y(v.y()), z(v.z()) {}
|
||||
|
||||
operator sycl::float3() {
|
||||
return sycl::float3(x,y,z);
|
||||
}
|
||||
};
|
||||
|
||||
struct intel_float4x3 {
|
||||
intel_float3 vx, vy, vz, p;
|
||||
};
|
||||
|
||||
struct intel_ray_desc_t
|
||||
{
|
||||
intel_float3 origin;
|
||||
intel_float3 direction;
|
||||
float tmin;
|
||||
float tmax;
|
||||
unsigned int mask;
|
||||
intel_ray_flags_t flags;
|
||||
};
|
||||
|
||||
#include "rttrace_internal.h"
|
||||
|
||||
// opaque types
|
||||
struct intel_ray_query_t {
|
||||
void* opaque0; void* opaque1; void* opaque2; uint32_t ctrl; uint32_t bvh_level;
|
||||
MemHit& hit(intel_hit_type_t ty) {
|
||||
struct RTStack* rtStack = (struct RTStack*) opaque2;
|
||||
return rtStack->hit[ty];
|
||||
}
|
||||
};
|
||||
typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t;
|
||||
|
||||
// check supported ray tracing features
|
||||
SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag();
|
||||
|
||||
// initializes a ray query
|
||||
SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
);
|
||||
|
||||
// setup for instance traversal using a transformed ray and bottom-level AS
|
||||
SYCL_EXTERNAL void intel_ray_query_forward_ray(
|
||||
intel_ray_query_t& query,
|
||||
intel_ray_desc_t ray,
|
||||
intel_raytracing_acceleration_structure_t accel
|
||||
);
|
||||
|
||||
// commit the potential hit
|
||||
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit(
|
||||
intel_ray_query_t& query
|
||||
);
|
||||
|
||||
// commit the potential hit and override hit distance and UVs
|
||||
SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override(
|
||||
intel_ray_query_t& query,
|
||||
float override_hit_distance,
|
||||
intel_float2 override_uv
|
||||
);
|
||||
|
||||
// start traversal of a ray query
|
||||
SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query );
|
||||
|
||||
// synchronize rayquery execution. If a ray was dispatched,
|
||||
// This must be called prior to calling any of the accessors below.
|
||||
SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query );
|
||||
|
||||
// signal that a ray query will not be used further. This is the moral equaivalent of a delete
|
||||
// this function does an implicit sync
|
||||
SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query );
|
||||
|
||||
// read hit information during shader execution
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for quad leaves
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for procedural leaves
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
|
||||
// fetch triangle vertices for a hit
|
||||
SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 vertices_out[3], intel_hit_type_t hit_type );
|
||||
|
||||
// Read ray-data. This is used to read transformed rays produced by HW instancing pipeline
|
||||
// during any-hit or intersection shader execution.
|
||||
SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level );
|
||||
SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level );
|
||||
|
||||
// if traversal returns one can test if a triangle or procedural is hit
|
||||
enum intel_candidate_type_t
|
||||
{
|
||||
intel_candidate_type_triangle,
|
||||
intel_candidate_type_procedural
|
||||
};
|
||||
|
||||
SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type );
|
||||
|
||||
// test whether traversal has terminated. If false, the ray has reached
|
||||
// a procedural leaf or a non-opaque triangle leaf, and requires shader processing
|
||||
SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query );
|
||||
|
||||
// if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader
|
||||
SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query );
|
||||
89
Framework/external/embree/kernels/rthwif/testing/CMakeLists.txt
vendored
Normal file
89
Framework/external/embree/kernels/rthwif/testing/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
## Copyright 2009-2022 Intel Corporation
|
||||
## SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#PROJECT(rthwif_testing)
|
||||
#CMAKE_MINIMUM_REQUIRED(VERSION 3.1.0)
|
||||
|
||||
SET(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
IF (NOT WIN32)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||
ENDIF()
|
||||
|
||||
IF (NOT DEFINED EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||
OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON)
|
||||
ENDIF()
|
||||
|
||||
IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API)
|
||||
OPTION(EMBREE_SYCL_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF)
|
||||
ENDIF()
|
||||
|
||||
IF (EMBREE_SYCL_RT_VALIDATION_API)
|
||||
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API")
|
||||
ENDIF()
|
||||
|
||||
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
|
||||
ENDIF()
|
||||
|
||||
IF (EMBREE_SYCL_RT_SIMULATION)
|
||||
SET(RT_SIM_LIBRARY rtcore)
|
||||
ENDIF()
|
||||
|
||||
ADD_EXECUTABLE(embree_rthwif_cornell_box rthwif_cornell_box.cpp)
|
||||
TARGET_LINK_LIBRARIES(embree_rthwif_cornell_box sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper)
|
||||
SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT")
|
||||
SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"")
|
||||
INSTALL(TARGETS embree_rthwif_cornell_box DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||
SIGN_TARGET(embree_rthwif_cornell_box)
|
||||
|
||||
ADD_EXECUTABLE(embree_rthwif_test rthwif_test.cpp)
|
||||
TARGET_LINK_LIBRARIES(embree_rthwif_test sys simd ${TBB_TARGET} ${RT_SIM_LIBRARY} ze_wrapper)
|
||||
SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT")
|
||||
SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"")
|
||||
INSTALL(TARGETS embree_rthwif_test DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||
SIGN_TARGET(embree_rthwif_test)
|
||||
|
||||
IF (NOT DEFINED EMBREE_SYCL_RT_VALIDATION_API OR EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||
|
||||
IF (DEFINED EMBREE_MODEL_DIR)
|
||||
SET(CORNELL_BOX_REFERENCE "${EMBREE_MODEL_DIR}/reference/cornell_box_reference.tga")
|
||||
ELSE()
|
||||
SET(CORNELL_BOX_REFERENCE "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga")
|
||||
ENDIF()
|
||||
|
||||
ADD_TEST(NAME rthwif_cornell_box
|
||||
COMMAND embree_rthwif_cornell_box --compare "${CORNELL_BOX_REFERENCE}"
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
ENDIF()
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_expected)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_expected)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_expected)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_expected embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_expected)
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_triangles embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_triangles)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_benchmark_procedurals embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --benchmark_procedurals)
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_triangles_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_triangles --build_mode_worst_case)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_procedurals_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_procedurals --build_mode_worst_case)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_instances_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_instances --build_mode_worst_case)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_builder_mixed_worst_case embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --build_test_mixed --build_mode_worst_case)
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-committed-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-potential-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-commit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --triangles-anyhit-shader-reject)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --no-instancing --procedurals-committed-hit)
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-committed-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-potential-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-commit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --triangles-anyhit-shader-reject)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_hwinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --hw-instancing --procedurals-committed-hit)
|
||||
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-committed-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_potential_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-potential-hit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_commit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-commit)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_triangles_anyhit_shader_reject embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --triangles-anyhit-shader-reject)
|
||||
ADD_EMBREE_TEST_ECS(rthwif_test_swinstancing_procedurals_committed_hit embree_rthwif_test NO_REFERENCE NO_POSTFIX INTENSITY 1 CONDITION "EMBREE_SYCL_SUPPORT == ON" ARGS --sw-instancing --procedurals-committed-hit)
|
||||
BIN
Framework/external/embree/kernels/rthwif/testing/cornell_box_reference.tga
vendored
Normal file
BIN
Framework/external/embree/kernels/rthwif/testing/cornell_box_reference.tga
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 768 KiB |
630
Framework/external/embree/kernels/rthwif/testing/rthwif_cornell_box.cpp
vendored
Normal file
630
Framework/external/embree/kernels/rthwif/testing/rthwif_cornell_box.cpp
vendored
Normal file
|
|
@ -0,0 +1,630 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include <CL/sycl.hpp>
|
||||
#include "tbb/tbb.h"
|
||||
|
||||
#include "../rttrace/rttrace.h"
|
||||
|
||||
#include <level_zero/ze_wrapper.h>
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
void* dispatchGlobalsPtr = nullptr;
|
||||
|
||||
static uint32_t global_width = 512;
|
||||
static uint32_t global_height = 512;
|
||||
|
||||
void exception_handler(sycl::exception_list exceptions)
|
||||
{
|
||||
for (std::exception_ptr const& e : exceptions) {
|
||||
try {
|
||||
std::rethrow_exception(e);
|
||||
} catch(sycl::exception const& e) {
|
||||
std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void fwrite_uchar (unsigned char v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); }
|
||||
inline void fwrite_ushort(unsigned short v, std::fstream& file) { file.write((const char*)&v,sizeof(v)); }
|
||||
|
||||
void storeTga(uint32_t* pixels, uint32_t width, uint32_t height, const std::string& fileName) try
|
||||
{
|
||||
std::fstream file;
|
||||
file.exceptions (std::fstream::failbit | std::fstream::badbit);
|
||||
file.open (fileName.c_str(), std::fstream::out | std::fstream::binary);
|
||||
|
||||
fwrite_uchar(0x00, file);
|
||||
fwrite_uchar(0x00, file);
|
||||
fwrite_uchar(0x02, file);
|
||||
fwrite_ushort(0x0000, file);
|
||||
fwrite_ushort(0x0000, file);
|
||||
fwrite_uchar(0x00, file);
|
||||
fwrite_ushort(0x0000, file);
|
||||
fwrite_ushort(0x0000, file);
|
||||
fwrite_ushort((unsigned short)width , file);
|
||||
fwrite_ushort((unsigned short)height, file);
|
||||
fwrite_uchar(0x18, file);
|
||||
fwrite_uchar(0x20, file);
|
||||
|
||||
for (size_t y=0; y<height; y++) {
|
||||
for (size_t x=0; x<width; x++) {
|
||||
const uint32_t c = pixels[y*width+x];
|
||||
fwrite_uchar((unsigned char)((c>>0)&0xFF), file);
|
||||
fwrite_uchar((unsigned char)((c>>8)&0xFF), file);
|
||||
fwrite_uchar((unsigned char)((c>>16)&0xFF), file);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (std::exception const& e) {
|
||||
std::cout << "Error: Cannot write file " << fileName << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
std::vector<unsigned char> readFile(const std::string& fileName) try
|
||||
{
|
||||
std::fstream file;
|
||||
file.exceptions (std::fstream::failbit | std::fstream::badbit);
|
||||
file.open (fileName.c_str(), std::fstream::in | std::fstream::binary);
|
||||
|
||||
file.seekg (0, std::ios::end);
|
||||
std::streampos size = file.tellg();
|
||||
std::vector<unsigned char> data(size);
|
||||
file.seekg (0, std::ios::beg);
|
||||
file.read ((char*)data.data(), size);
|
||||
file.close();
|
||||
|
||||
return data;
|
||||
}
|
||||
catch (std::exception const& e) {
|
||||
std::cout << "Error: Cannot read file " << fileName << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
size_t compareTga(const std::string& fileNameA, const std::string& fileNameB)
|
||||
{
|
||||
const std::vector<unsigned char> dataA = readFile(fileNameA);
|
||||
const std::vector<unsigned char> dataB = readFile(fileNameB);
|
||||
if (dataA.size() != dataB.size())
|
||||
return false;
|
||||
|
||||
size_t diff = 0;
|
||||
for (int i=0; i<dataA.size(); i++)
|
||||
{
|
||||
if (std::abs((int)dataA[i] - (int)dataB[i]) == 1) diff++;
|
||||
if (std::abs((int)dataA[i] - (int)dataB[i]) == 2) diff+=4;
|
||||
if (std::abs((int)dataA[i] - (int)dataB[i]) >= 3) diff+=100;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
/* Properly allocates an acceleration structure buffer using ze_raytracing_mem_alloc_ext_desc_t property. */
|
||||
void* alloc_accel_buffer(size_t bytes, sycl::device device, sycl::context context)
|
||||
{
|
||||
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
|
||||
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
|
||||
|
||||
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
|
||||
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
|
||||
ze_result_t err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeDeviceGetProperties failed");
|
||||
|
||||
ze_raytracing_mem_alloc_ext_desc_t rt_desc;
|
||||
rt_desc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
|
||||
rt_desc.pNext = nullptr;
|
||||
rt_desc.flags = 0;
|
||||
|
||||
ze_device_mem_alloc_desc_t device_desc;
|
||||
device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
device_desc.pNext = &rt_desc;
|
||||
device_desc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED;
|
||||
device_desc.ordinal = 0;
|
||||
|
||||
ze_host_mem_alloc_desc_t host_desc;
|
||||
host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
host_desc.pNext = nullptr;
|
||||
host_desc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
|
||||
|
||||
void* ptr = nullptr;
|
||||
ze_result_t result = ZeWrapper::zeMemAllocShared(hContext,&device_desc,&host_desc,bytes,rtasProp.rtasBufferAlignment,hDevice,&ptr);
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("acceleration buffer allocation failed");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void free_accel_buffer(void* ptr, sycl::context context)
|
||||
{
|
||||
ze_context_handle_t hContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(context);
|
||||
ze_result_t result = ZeWrapper::zeMemFree(hContext,ptr);
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("acceleration buffer free failed");
|
||||
}
|
||||
|
||||
|
||||
/* dispatch globals allocation is for debugging only */
|
||||
|
||||
enum Flags : uint32_t {
|
||||
FLAGS_NONE,
|
||||
DEPTH_TEST_LESS_EQUAL = 1 << 0 // when set we use <= for depth test, otherwise <
|
||||
};
|
||||
|
||||
struct DispatchGlobals
|
||||
{
|
||||
uint64_t rtMemBasePtr; // base address of the allocated stack memory
|
||||
uint64_t callStackHandlerKSP; // this is the KSP of the continuation handler that is invoked by BTD when the read KSP is 0
|
||||
uint32_t asyncStackSize; // async-RT stack size in 64 byte blocks
|
||||
uint32_t numDSSRTStacks : 16; // number of stacks per DSS
|
||||
uint32_t syncRayQueryCount : 4; // number of ray queries in the sync-RT stack: 0-15 mapped to: 1-16
|
||||
unsigned _reserved_mbz : 12;
|
||||
uint32_t maxBVHLevels; // the maximal number of supported instancing levels (0->8, 1->1, 2->2, ...)
|
||||
Flags flags; // per context control flags
|
||||
};
|
||||
|
||||
void* allocDispatchGlobals(sycl::device device, sycl::context context)
|
||||
{
|
||||
size_t maxBVHLevels = 2; //RTC_MAX_INSTANCE_LEVEL_COUNT+1;
|
||||
|
||||
size_t rtstack_bytes = (64+maxBVHLevels*(64+32)+63)&-64;
|
||||
size_t num_rtstacks = 1<<17; // this is sufficiently large also for PVC
|
||||
size_t dispatchGlobalSize = 128+num_rtstacks*rtstack_bytes;
|
||||
|
||||
void* dispatchGlobalsPtr = alloc_accel_buffer(dispatchGlobalSize,device,context);
|
||||
memset(dispatchGlobalsPtr, 0, dispatchGlobalSize);
|
||||
|
||||
DispatchGlobals* dg = (DispatchGlobals*) dispatchGlobalsPtr;
|
||||
dg->rtMemBasePtr = (uint64_t) dispatchGlobalsPtr + dispatchGlobalSize;
|
||||
dg->callStackHandlerKSP = 0;
|
||||
dg->asyncStackSize = 0;
|
||||
dg->numDSSRTStacks = 0;
|
||||
dg->syncRayQueryCount = 0;
|
||||
dg->_reserved_mbz = 0;
|
||||
dg->maxBVHLevels = maxBVHLevels;
|
||||
dg->flags = DEPTH_TEST_LESS_EQUAL;
|
||||
|
||||
return dispatchGlobalsPtr;
|
||||
}
|
||||
|
||||
/* vertex indices for cornell_box model */
|
||||
ze_rtas_triangle_indices_uint32_exp_t indices[] = {
|
||||
{ 0, 1, 2 },
|
||||
{ 0, 2, 3 },
|
||||
{ 4, 5, 6 },
|
||||
{ 4, 6, 7 },
|
||||
{ 8, 9, 10 },
|
||||
{ 8, 10, 11 },
|
||||
{ 12, 13, 14 },
|
||||
{ 12, 14, 15 },
|
||||
{ 16, 17, 18 },
|
||||
{ 16, 18, 19 },
|
||||
{ 20, 21, 22 },
|
||||
{ 20, 22, 23 },
|
||||
{ 24, 25, 26 },
|
||||
{ 24, 26, 27 },
|
||||
{ 28, 29, 30 },
|
||||
{ 28, 30, 31 },
|
||||
{ 32, 33, 34 },
|
||||
{ 32, 34, 35 },
|
||||
{ 36, 37, 38 },
|
||||
{ 36, 38, 39 },
|
||||
{ 40, 41, 42 },
|
||||
{ 40, 42, 43 },
|
||||
{ 44, 45, 46 },
|
||||
{ 44, 46, 47 },
|
||||
{ 48, 49, 50 },
|
||||
{ 48, 50, 51 },
|
||||
{ 52, 53, 54 },
|
||||
{ 52, 54, 55 },
|
||||
{ 56, 57, 58 },
|
||||
{ 56, 58, 59 },
|
||||
{ 60, 61, 62 },
|
||||
{ 60, 62, 63 },
|
||||
{ 64, 65, 66 },
|
||||
{ 64, 66, 67 }
|
||||
};
|
||||
|
||||
/* vertex positions for cornell_box model */
|
||||
ze_rtas_float3_exp_t vertices[] = {
|
||||
{ 552.8, 0, 0 },
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 0, 559.2 },
|
||||
{ 549.6, 0, 559.2 },
|
||||
{ 290, 0, 114 },
|
||||
{ 240, 0, 272 },
|
||||
{ 82, 0, 225 },
|
||||
{ 130, 0, 65 },
|
||||
{ 472, 0, 406 },
|
||||
{ 314, 0, 456 },
|
||||
{ 265, 0, 296 },
|
||||
{ 423, 0, 247 },
|
||||
{ 556, 548.8, 0 },
|
||||
{ 556, 548.8, 559.2 },
|
||||
{ 0, 548.8, 559.2 },
|
||||
{ 0, 548.8, 0 },
|
||||
{ 549.6, 0, 559.2 },
|
||||
{ 0, 0, 559.2 },
|
||||
{ 0, 548.8, 559.2 },
|
||||
{ 556, 548.8, 559.2 },
|
||||
{ 0, 0, 559.2 },
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 548.8, 0 },
|
||||
{ 0, 548.8, 559.2 },
|
||||
{ 552.8, 0, 0 },
|
||||
{ 549.6, 0, 559.2 },
|
||||
{ 556, 548.8, 559.2 },
|
||||
{ 556, 548.8, 0 },
|
||||
{ 130, 165, 65 },
|
||||
{ 82, 165, 225 },
|
||||
{ 240, 165, 272 },
|
||||
{ 290, 165, 114 },
|
||||
{ 290, 0, 114 },
|
||||
{ 290, 165, 114 },
|
||||
{ 240, 165, 272 },
|
||||
{ 240, 0, 272 },
|
||||
{ 130, 0, 65 },
|
||||
{ 130, 165, 65 },
|
||||
{ 290, 165, 114 },
|
||||
{ 290, 0, 114 },
|
||||
{ 82, 0, 225 },
|
||||
{ 82, 165, 225 },
|
||||
{ 130, 165, 65 },
|
||||
{ 130, 0, 65 },
|
||||
{ 240, 0, 272 },
|
||||
{ 240, 165, 272 },
|
||||
{ 82, 165, 225 },
|
||||
{ 82, 0, 225 },
|
||||
{ 423, 330, 247 },
|
||||
{ 265, 330, 296 },
|
||||
{ 314, 330, 456 },
|
||||
{ 472, 330, 406 },
|
||||
{ 423, 0, 247 },
|
||||
{ 423, 330, 247 },
|
||||
{ 472, 330, 406 },
|
||||
{ 472, 0, 406 },
|
||||
{ 472, 0, 406 },
|
||||
{ 472, 330, 406 },
|
||||
{ 314, 330, 456 },
|
||||
{ 314, 0, 456 },
|
||||
{ 314, 0, 456 },
|
||||
{ 314, 330, 456 },
|
||||
{ 265, 330, 296 },
|
||||
{ 265, 0, 296 },
|
||||
{ 265, 0, 296 },
|
||||
{ 265, 330, 296 },
|
||||
{ 423, 330, 247 },
|
||||
{ 423, 0, 247 },
|
||||
};
|
||||
|
||||
/* builds acceleration structure */
|
||||
void* build_rtas(sycl::device device, sycl::context context)
|
||||
{
|
||||
/* get L0 handles */
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device.get_platform());
|
||||
ze_device_handle_t hDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device);
|
||||
|
||||
/* create rtas builder object */
|
||||
ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC };
|
||||
ze_rtas_builder_exp_handle_t hBuilder = nullptr;
|
||||
ze_result_t err = ZeWrapper::zeRTASBuilderCreateExp(hDriver, &builderDesc, &hBuilder);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("ze_rtas_builder creation failed");
|
||||
|
||||
/* create geometry descriptor for single triangle mesh */
|
||||
ze_rtas_builder_triangles_geometry_info_exp_t mesh = {};
|
||||
mesh.geometryType = ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES;
|
||||
mesh.geometryFlags = 0;
|
||||
mesh.geometryMask = 0xFF;
|
||||
|
||||
mesh.triangleFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32;
|
||||
mesh.triangleCount = sizeof(indices)/sizeof(ze_rtas_triangle_indices_uint32_exp_t);
|
||||
mesh.triangleStride = sizeof(ze_rtas_triangle_indices_uint32_exp_t);
|
||||
mesh.pTriangleBuffer = indices;
|
||||
|
||||
mesh.vertexFormat = ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3;
|
||||
mesh.vertexCount = sizeof(vertices)/sizeof(ze_rtas_float3_exp_t);
|
||||
mesh.vertexStride = sizeof(ze_rtas_float3_exp_t);
|
||||
mesh.pVertexBuffer = vertices;
|
||||
|
||||
/* fill geometry descriptor array with pointer to single geometry descriptor */
|
||||
std::vector<ze_rtas_builder_geometry_info_exp_t*> descs;
|
||||
descs.push_back((ze_rtas_builder_geometry_info_exp_t*)&mesh);
|
||||
|
||||
/* get acceleration structure format for this device */
|
||||
ze_rtas_device_exp_properties_t rtasProp = { ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES };
|
||||
ze_device_properties_t devProp = { ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &rtasProp };
|
||||
err = ZeWrapper::zeDeviceGetProperties(hDevice, &devProp );
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeDeviceGetProperties failed");
|
||||
|
||||
/* create parallel operation for parallel build */
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation = nullptr;
|
||||
err = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASParallelOperationCreateExp failed");
|
||||
|
||||
/* create descriptor of build operation */
|
||||
size_t accelBufferBytesOut = 0;
|
||||
ze_rtas_aabb_exp_t bounds;
|
||||
ze_rtas_builder_build_op_exp_desc_t buildOp = {};
|
||||
buildOp.stype = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC;
|
||||
buildOp.pNext = nullptr;
|
||||
buildOp.rtasFormat = rtasProp.rtasFormat;
|
||||
buildOp.buildQuality = ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM;
|
||||
buildOp.buildFlags = 0;
|
||||
buildOp.ppGeometries = (const ze_rtas_builder_geometry_info_exp_t **) descs.data();
|
||||
buildOp.numGeometries = descs.size();
|
||||
|
||||
/* just for debugging purposes */
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
ze_rtas_builder_build_op_debug_exp_desc_t buildOpDebug = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_EXP_DESC };
|
||||
buildOpDebug.dispatchGlobalsPtr = dispatchGlobalsPtr;
|
||||
buildOp.pNext = &buildOpDebug;
|
||||
#endif
|
||||
|
||||
/* query required buffer sizes */
|
||||
ze_rtas_builder_exp_properties_t buildProps = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES };
|
||||
err = ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(hBuilder,&buildOp,&buildProps);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASBuilderGetBuildPropertiesExp failed");
|
||||
|
||||
/* allocate scratch buffer */
|
||||
std::vector<char> scratchBuffer(buildProps.scratchBufferSizeBytes);
|
||||
memset(scratchBuffer.data(),0,scratchBuffer.size());
|
||||
|
||||
/* allocate acceleration structure buffer */
|
||||
size_t accelBytes = buildProps.rtasBufferSizeBytesMaxRequired;
|
||||
void* accel = alloc_accel_buffer(accelBytes,device,context);
|
||||
memset(accel,0,accelBytes); // optional
|
||||
|
||||
/* build acceleration strucuture multi threaded */
|
||||
err = ZeWrapper::zeRTASBuilderBuildExp(hBuilder,&buildOp,
|
||||
scratchBuffer.data(),scratchBuffer.size(),
|
||||
accel, accelBytes,
|
||||
hParallelOperation,
|
||||
nullptr, &bounds, &accelBufferBytesOut);
|
||||
|
||||
if (err != ZE_RESULT_EXP_RTAS_BUILD_DEFERRED)
|
||||
throw std::runtime_error("zeRTASBuilderBuildExp failed");
|
||||
|
||||
/* after the build is started one can query number of threads to use for the build */
|
||||
ze_rtas_parallel_operation_exp_properties_t prop = { ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES };
|
||||
err = ZeWrapper::zeRTASParallelOperationGetPropertiesExp(hParallelOperation,&prop);
|
||||
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASParallelOperationGetPropertiesExp failed");
|
||||
|
||||
/* build in parallel using maximal number of build threads */
|
||||
tbb::parallel_for(0u, prop.maxConcurrency, 1u, [&](uint32_t) {
|
||||
err = ZeWrapper::zeRTASParallelOperationJoinExp(hParallelOperation);
|
||||
});
|
||||
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASParallelOperationJoinExp failed");
|
||||
|
||||
/* destroy parallel operation again */
|
||||
err = ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASParallelOperationDestroyExp failed");
|
||||
|
||||
/* destroy rtas builder again */
|
||||
err = ZeWrapper::zeRTASBuilderDestroyExp(hBuilder);
|
||||
if (err != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeRTASBuilderDestroyExp failed");
|
||||
|
||||
return accel;
|
||||
}
|
||||
|
||||
/* render using simple UV shading */
|
||||
void render(unsigned int x, unsigned int y, void* bvh, unsigned int* pixels, unsigned int width, unsigned int height)
|
||||
{
|
||||
/* write zero image if ray tracing extension is not supported */
|
||||
intel_raytracing_ext_flag_t flags = intel_get_raytracing_ext_flag();
|
||||
if (!(flags & intel_raytracing_ext_flag_ray_query)) {
|
||||
pixels[y*width+x] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* fixed camera */
|
||||
sycl::float3 vx(-1.f, -0.f, -0.f);
|
||||
sycl::float3 vy(-0.f, -1.f, -0.f);
|
||||
sycl::float3 vz(32.f, 32.f, 95.6379f);
|
||||
sycl::float3 p(278.f, 273.f, -800.f);
|
||||
|
||||
/* compute primary ray */
|
||||
intel_ray_desc_t ray;
|
||||
ray.origin = p;
|
||||
ray.direction = float(x)*vx*64.0f/float(width) + float(y)*vy*64/float(height) + vz;
|
||||
ray.tmin = 0.0f;
|
||||
ray.tmax = INFINITY;
|
||||
ray.mask = 0xFF;
|
||||
ray.flags = intel_ray_flags_none;
|
||||
|
||||
/* trace ray */
|
||||
intel_ray_query_t query = intel_ray_query_init(ray,(intel_raytracing_acceleration_structure_t)bvh);
|
||||
intel_ray_query_start_traversal(query);
|
||||
intel_ray_query_sync(query);
|
||||
|
||||
/* get UVs of hit point */
|
||||
float u = 0, v = 0;
|
||||
if (intel_has_committed_hit(query))
|
||||
{
|
||||
sycl::float2 uv = intel_get_hit_barycentrics( query, intel_hit_type_committed_hit );
|
||||
u = uv.x();
|
||||
v = uv.y();
|
||||
}
|
||||
|
||||
/* write color to framebuffer */
|
||||
sycl::float3 color(u,v,1.0f-u-v);
|
||||
unsigned int r = (unsigned int) (255.0f * color.x());
|
||||
unsigned int g = (unsigned int) (255.0f * color.y());
|
||||
unsigned int b = (unsigned int) (255.0f * color.z());
|
||||
pixels[y*width+x] = (b << 16) + (g << 8) + r;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) try
|
||||
{
|
||||
/* use can specify reference image to compare against */
|
||||
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
|
||||
#else
|
||||
ZeWrapper::RTAS_BUILD_MODE rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
|
||||
#endif
|
||||
|
||||
char* reference_img = NULL;
|
||||
for (int i=1; i<argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "--compare") == 0) {
|
||||
if (++i >= argc) throw std::runtime_error("--compare: filename expected");
|
||||
reference_img = argv[i];
|
||||
}
|
||||
else if (strcmp(argv[i], "--internal-rtas-builder") == 0) {
|
||||
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::INTERNAL;
|
||||
}
|
||||
else if (strcmp(argv[i], "--level-zero-rtas-builder") == 0) {
|
||||
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::LEVEL_ZERO;
|
||||
}
|
||||
else if (strcmp(argv[i], "--default-rtas-builder") == 0) {
|
||||
rtas_build_mode = ZeWrapper::RTAS_BUILD_MODE::AUTO;
|
||||
}
|
||||
else if (strcmp(argv[i], "--size") == 0) {
|
||||
if (++i >= argc) throw std::runtime_error("--size: width expected");
|
||||
global_width = atoi(argv[i]);
|
||||
if (++i >= argc) throw std::runtime_error("--size: height expected");
|
||||
global_height = atoi(argv[i]);
|
||||
if (global_width == 0) throw std::runtime_error("--size: width is zero");
|
||||
if (global_height == 0) throw std::runtime_error("--size: height is zero");
|
||||
if (global_width > 4096) throw std::runtime_error("--size: width too large");
|
||||
if (global_height > 4096) throw std::runtime_error("--size: height too large");
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("unknown command line argument");
|
||||
}
|
||||
}
|
||||
|
||||
/* create SYCL objects */
|
||||
sycl::device device = sycl::device(sycl::gpu_selector_v);
|
||||
sycl::queue queue = sycl::queue(device,exception_handler);
|
||||
sycl::context context = queue.get_context();
|
||||
|
||||
if (ZeWrapper::init() != ZE_RESULT_SUCCESS) {
|
||||
std::cerr << "ZeWrapper not successfully initialized" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
sycl::platform platform = device.get_platform();
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
|
||||
|
||||
/* enable RTAS extension only when enabled */
|
||||
if (rtas_build_mode == ZeWrapper::RTAS_BUILD_MODE::AUTO)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
std::vector<ze_driver_extension_properties_t> extensions;
|
||||
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeDriverGetExtensionProperties failed");
|
||||
|
||||
extensions.resize(count);
|
||||
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("zeDriverGetExtensionProperties failed");
|
||||
|
||||
bool ze_rtas_builder = false;
|
||||
for (uint32_t i=0; i<extensions.size(); i++)
|
||||
{
|
||||
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
|
||||
ze_rtas_builder = true;
|
||||
}
|
||||
|
||||
if (ze_rtas_builder)
|
||||
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::AUTO);
|
||||
else
|
||||
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::RTAS_BUILD_MODE::INTERNAL);
|
||||
}
|
||||
else
|
||||
result = ZeWrapper::initRTASBuilder(hDriver,rtas_build_mode);
|
||||
|
||||
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
|
||||
throw std::runtime_error("cannot load ZE_experimental_rtas_builder extension");
|
||||
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw std::runtime_error("cannot initialize ZE_experimental_rtas_builder extension");
|
||||
|
||||
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
|
||||
std::cout << "using internal RTAS builder" << std::endl;
|
||||
else
|
||||
std::cout << "using Level Zero RTAS builder" << std::endl;
|
||||
|
||||
#if defined(ZE_RAYTRACING_RT_SIMULATION)
|
||||
RTCore::Init();
|
||||
RTCore::SetXeVersion((RTCore::XeVersion)ZE_RAYTRACING_DEVICE);
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
dispatchGlobalsPtr = allocDispatchGlobals(device,context);
|
||||
#endif
|
||||
|
||||
/* build acceleration structure */
|
||||
void* bvh = build_rtas(device,context);
|
||||
|
||||
/* creates framebuffer */
|
||||
const uint32_t width = global_width;
|
||||
const uint32_t height = global_height;
|
||||
unsigned int* pixels = (unsigned int*) sycl::aligned_alloc(64,width*height*sizeof(unsigned int),device,context,sycl::usm::alloc::shared);
|
||||
memset(pixels, 0, width*height*sizeof(uint32_t));
|
||||
|
||||
/* renders image on device */
|
||||
#if defined(ZE_RAYTRACING_RT_SIMULATION)
|
||||
tbb::parallel_for(tbb::blocked_range2d<uint32_t>(0,height,0,width),
|
||||
[&](const tbb::blocked_range2d<uint32_t>& r) {
|
||||
for (int y=r.rows().begin(); y<r.rows().end(); y++) {
|
||||
for (int x=r.cols().begin(); x<r.cols().end(); x++) {
|
||||
render(x,y,bvh,pixels,width,height);
|
||||
}
|
||||
}
|
||||
});
|
||||
#else
|
||||
queue.submit([&](sycl::handler& cgh) {
|
||||
const sycl::range<2> range(width,height);
|
||||
cgh.parallel_for(range, [=](sycl::item<2> item) {
|
||||
const uint32_t x = item.get_id(0);
|
||||
const uint32_t y = item.get_id(1);
|
||||
render(x,y,bvh,pixels,width,height);
|
||||
});
|
||||
});
|
||||
queue.wait_and_throw();
|
||||
#endif
|
||||
|
||||
/* free acceleration structure again */
|
||||
free_accel_buffer(bvh,context);
|
||||
|
||||
#if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS)
|
||||
free_accel_buffer(dispatchGlobalsPtr, context);
|
||||
#endif
|
||||
|
||||
#if defined(ZE_RAYTRACING_RT_SIMULATION)
|
||||
RTCore::Cleanup();
|
||||
#endif
|
||||
|
||||
/* store image to disk */
|
||||
storeTga(pixels,width,height,"cornell_box.tga");
|
||||
if (!reference_img) return 0;
|
||||
|
||||
/* compare to reference image */
|
||||
const size_t err = compareTga("cornell_box.tga", "cornell_box_reference.tga");
|
||||
std::cout << "difference to reference image is " << err << std::endl;
|
||||
const bool ok = err < 32;
|
||||
std::cout << "cornell_box ";
|
||||
if (ok) std::cout << "[PASSED]" << std::endl;
|
||||
else std::cout << "[FAILED]" << std::endl;
|
||||
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
catch (std::runtime_error e) {
|
||||
std::cerr << "std::runtime_error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
2267
Framework/external/embree/kernels/rthwif/testing/rthwif_test.cpp
vendored
Normal file
2267
Framework/external/embree/kernels/rthwif/testing/rthwif_test.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue