Initial commit.
This commit is contained in:
commit
d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
build
|
||||||
5
Assignments/Assignment1/CMakeLists.txt
Normal file
5
Assignments/Assignment1/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
project(Assignment1)
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} "assignment1.cpp")
|
||||||
|
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)
|
||||||
5
Assignments/Assignment1/assignment1.cpp
Normal file
5
Assignments/Assignment1/assignment1.cpp
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
5
Assignments/Assignment2/CMakeLists.txt
Normal file
5
Assignments/Assignment2/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
project(Assignment2)
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} "assignment2.cpp")
|
||||||
|
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)
|
||||||
4
Assignments/Assignment2/assignment2.cpp
Normal file
4
Assignments/Assignment2/assignment2.cpp
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
5
Assignments/Assignment3/CMakeLists.txt
Normal file
5
Assignments/Assignment3/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
project(Assignment3)
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} "assignment3.cpp")
|
||||||
|
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)
|
||||||
4
Assignments/Assignment3/assignment3.cpp
Normal file
4
Assignments/Assignment3/assignment3.cpp
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
5
Assignments/Assignment4/CMakeLists.txt
Normal file
5
Assignments/Assignment4/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
project(Assignment4)
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} "assignment4.cpp")
|
||||||
|
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)
|
||||||
5
Assignments/Assignment4/assignment4.cpp
Normal file
5
Assignments/Assignment4/assignment4.cpp
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
13
CMakeLists.txt
Normal file
13
CMakeLists.txt
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
|
||||||
|
project(CGI C CXX)
|
||||||
|
|
||||||
|
add_compile_definitions(WORKING_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/Framework)
|
||||||
|
|
||||||
|
|
||||||
|
### Assignments ###
|
||||||
|
foreach (num RANGE 1 1)
|
||||||
|
add_subdirectory(${CMAKE_SOURCE_DIR}/Assignments/Assignment${num})
|
||||||
|
endforeach ()
|
||||||
55
Framework/CMakeLists.txt
Normal file
55
Framework/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
|
||||||
|
|
||||||
|
project(CGI-framework C CXX)
|
||||||
|
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external)
|
||||||
|
|
||||||
|
find_package(OpenGL REQUIRED)
|
||||||
|
|
||||||
|
set(IMGUI_DIR external/imgui)
|
||||||
|
set(PROJECT_INCLUDE_DIRS
|
||||||
|
include
|
||||||
|
# scenegraph
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
${EMBREE_INCLUDE_DIRS}
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
${GLFW_INCLUDE}
|
||||||
|
${GLAD_INCLUDE}
|
||||||
|
${IMGUI_DIR}
|
||||||
|
${IMGUI_DIR}/backends)
|
||||||
|
|
||||||
|
set(SOURCES_IMGUI
|
||||||
|
${IMGUI_DIR}/backends/imgui_impl_glfw.cpp
|
||||||
|
${IMGUI_DIR}/backends/imgui_impl_opengl3.cpp
|
||||||
|
${IMGUI_DIR}/imgui.cpp
|
||||||
|
${IMGUI_DIR}/imgui_draw.cpp
|
||||||
|
${IMGUI_DIR}/imgui_demo.cpp
|
||||||
|
${IMGUI_DIR}/imgui_tables.cpp
|
||||||
|
${IMGUI_DIR}/imgui_widgets.cpp)
|
||||||
|
|
||||||
|
set(SOURCES_LIGHT
|
||||||
|
lights/light.cpp
|
||||||
|
lights/light.h
|
||||||
|
lights/ambient_light.cpp
|
||||||
|
lights/ambient_light.h
|
||||||
|
lights/directional_light.cpp
|
||||||
|
lights/directional_light.h
|
||||||
|
lights/point_light.cpp
|
||||||
|
lights/point_light.h
|
||||||
|
lights/quad_light.cpp
|
||||||
|
lights/quad_light.h
|
||||||
|
lights/spot_light.cpp
|
||||||
|
lights/spot_light.h
|
||||||
|
)
|
||||||
|
|
||||||
|
set(SOURCES
|
||||||
|
${SOURCES_IMGUI}
|
||||||
|
${SOURCES_LIGHT}
|
||||||
|
scenegraph/scenegraph.cpp
|
||||||
|
scenegraph/obj_loader.cpp
|
||||||
|
src/application.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(${PROJECT_NAME} ${SOURCES})
|
||||||
|
target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_INCLUDE_DIRS})
|
||||||
|
target_link_libraries(${PROJECT_NAME} PUBLIC glfw OpenGL::GL glad embree)
|
||||||
26
Framework/external/CMakeLists.txt
vendored
Normal file
26
Framework/external/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
cmake_minimum_required(VERSION 3.25)
|
||||||
|
project(libraries)
|
||||||
|
|
||||||
|
# glfw
|
||||||
|
set(GLFW_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glfw/include PARENT_SCOPE)
|
||||||
|
|
||||||
|
set(GLFW_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
|
||||||
|
set(GLFW_BUILD_DOCS OFF CACHE BOOL "" FORCE)
|
||||||
|
set(GLFW_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||||
|
set(GLFW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
|
||||||
|
|
||||||
|
add_subdirectory(glfw)
|
||||||
|
|
||||||
|
set_property(TARGET glfw PROPERTY FOLDER "external")
|
||||||
|
|
||||||
|
# embree
|
||||||
|
set(EMBREE_STATIC_LIB ON )
|
||||||
|
set(EMBREE_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/embree/include ${CMAKE_CURRENT_SOURCE_DIR}/embree/common PARENT_SCOPE)
|
||||||
|
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/embree)
|
||||||
|
|
||||||
|
|
||||||
|
# glad
|
||||||
|
set(GLAD_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glad/include PARENT_SCOPE)
|
||||||
|
add_subdirectory(glad)
|
||||||
|
set_property(TARGET glad PROPERTY FOLDER "external")
|
||||||
904
Framework/external/embree/CHANGELOG.md
vendored
Normal file
904
Framework/external/embree/CHANGELOG.md
vendored
Normal file
|
|
@ -0,0 +1,904 @@
|
||||||
|
Version History
|
||||||
|
---------------
|
||||||
|
|
||||||
|
### Embree 4.3.1
|
||||||
|
- Add missing EMBREE_GEOMETRY types to embree-config.cmake
|
||||||
|
- User defined thread count now takes precedence for internal task scheduler
|
||||||
|
- Fixed static linking issue with ze_wrapper library
|
||||||
|
- Better error reporting for SYCL platform and driver problems in embree_info and tutorial apps.
|
||||||
|
- Patch to glfw source is not applied by default anymore.
|
||||||
|
- Known issue: Running Embree on Intel® Data Center GPU Max Series with 2 tiles (e.g. Intel® Data Center GPU Max 1550) requires setting the environment variable ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE.
|
||||||
|
- Known issue: Embree build using Apple Clang 15 and ARM support (via the SEE2NEON library) may cause "EXEC_BAD_INSTRUCTION" runtime exceptions. Please use Apple Clang <= 14 on macOS.
|
||||||
|
|
||||||
|
### Embree 4.3.0
|
||||||
|
- Added instance array primitive for reducing memony requirements in scenes
|
||||||
|
with large amounts of similar instances.
|
||||||
|
- Properly checks driver if L0 RTAS extension can get loaded.
|
||||||
|
- Added varying version of rtcGetGeometryTransform for ISPC.
|
||||||
|
- Fixed signature of RTCMemoryMonitorFunction for ISPC.
|
||||||
|
- Add support for ARM64 Windows platform in CMake.
|
||||||
|
|
||||||
|
### Embree 4.2.0
|
||||||
|
- SYCL version of Embree with GPU support is no longer in beta phase.
|
||||||
|
- Improved BVH build performance on many core machines for applications that oversubscribe threads.
|
||||||
|
- Added rtcGetGeometryTransformFromScene API function that can get used inside SYCL kernels.
|
||||||
|
- No longer linking to ze_loader in SYCL mode to avoid Intel(R) oneAPI Level Zero dependency
|
||||||
|
for CPU rendering.
|
||||||
|
- Releasing test package to test Embree.
|
||||||
|
|
||||||
|
### Embree 4.1.0
|
||||||
|
- Added support for Intel® Data Center GPU Max Series.
|
||||||
|
- Added ARM64 Linux support.
|
||||||
|
- Added EMBREE_BACKFACE_CULLING_SPHERES cmake option. The new cmake option defaults to OFF.
|
||||||
|
|
||||||
|
### Embree 4.0.1
|
||||||
|
- Improved performance for Tiger Lake, Comet Lake, Cannon Lake, Kaby Lake,
|
||||||
|
and Skylake client CPUs by using 256 bit SIMD instructions by default.
|
||||||
|
- Fixed broken motion blur of RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE geometry type.
|
||||||
|
- Fixed bvh build retry issue for TBB 2020.3
|
||||||
|
- Added support for Intel® Data Center GPU Flex Series
|
||||||
|
- Fixed issue on systems without a SYCL platform.
|
||||||
|
|
||||||
|
### Embree 4.0.0
|
||||||
|
- This Embree release adds support for Intel® Arc™ GPUs through SYCL.
|
||||||
|
- The SYCL support of Embree is in beta phase. Current functionality, quality,
|
||||||
|
and GPU performance may not reflect that of the final product. Please read the
|
||||||
|
documentation section "Embree SYCL Known Issues" for known limitations.
|
||||||
|
- Embree CPU support in this release as at Gold level, incorporating the same quality
|
||||||
|
and performance as previous releases.
|
||||||
|
- A small number of API changes were required to get optimal experience and
|
||||||
|
performance on the CPU and GPU. See documentation section "Upgrading from Embree 3 to
|
||||||
|
Embree 4" for details.
|
||||||
|
- rtcIntersect and rtcOccluded function arguments changed slightly.
|
||||||
|
- RTCIntersectContext is renamed to RTCRayQuery context and most members moved to
|
||||||
|
new RTCIntersectArguments and RTCOccludedArguments structures.
|
||||||
|
- rtcFilterIntersection and rtcFilterOcclusion API calls got replaced by
|
||||||
|
rtcInvokeIntersectFilterFromGeometry and rtcInvokeOccludedFilterFromGeometry API calls.
|
||||||
|
- rtcSetGeometryEnableFilterFunctionFromArguments enables argument filter functions for some geometry.
|
||||||
|
- RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER ray query flag enables argument filter functions for each geometry.
|
||||||
|
- User geometry callbacks have to return if a valid hit was found.
|
||||||
|
- Ray masking is enabled by default now as required by most users.
|
||||||
|
- The default ray mask for geometries got changed from 0xFFFFFFFF to 0x1.
|
||||||
|
- Removed ray stream API as rarely used with minimal performance benefits over packet tracing.
|
||||||
|
- Introduced rtcForwardIntersect/rtcForwardOccluded API calls to trace tail recursive rays from user geometry callback.
|
||||||
|
- The rtcGetGeometryUserDataFromScene API call got added to be used in SYCL code.
|
||||||
|
- Added support for user geometry callback function pointer passed through ray query context
|
||||||
|
- Feature flags enable reducing code complexity for optimal performance on the GPU.
|
||||||
|
- Fixed compilation issues for ARM AArch64 processor under Linux.
|
||||||
|
- Setting default frequency level to SIMD256 for ARM on all platforms.
|
||||||
|
This allows using double pumped NEON execution by enabling EMBREE_ISA_NEON2X in cmake under Linux.
|
||||||
|
- Fixed missing end caps of motion blurred line segments.
|
||||||
|
- EMBREE_ISPC_SUPPORT is turned OFF by default.
|
||||||
|
- Embree drops support of the deprecated Intel(R) Compiler. It is replaced by
|
||||||
|
the Intel(R) oneAPI DPC++/C++ Compiler on Windows and Linux and the
|
||||||
|
Intel(R) C++ Classic Compiler on MacOS (latest tested versions is 2023.0.0).
|
||||||
|
|
||||||
|
### Embree 3.13.5
|
||||||
|
- Fixed bug in bounding flat Catmull Rom curves of subdivision level 4.
|
||||||
|
- Improved self intersection avoidance for
|
||||||
|
RTC_GEOMETRY_TYPE_DISC_POINT geometry type. Intersections are
|
||||||
|
skipped if the ray origin lies inside the sphere defined by the
|
||||||
|
point primitive. Self intersection avoidance can get disabled at compile time
|
||||||
|
using the EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE cmake option.
|
||||||
|
- Fixed spatial splitting for non-planar quads.
|
||||||
|
|
||||||
|
### Embree 3.13.4
|
||||||
|
- Using 8-wide BVH and double pumped NEON instructions on Apple M1 gives 8% performance boost.
|
||||||
|
- Fixed binning related crash in SAH BVH builder.
|
||||||
|
- Added EMBREE_TBB_COMPONENT cmake option to define the component/library name of Intel® TBB (default: tbb).
|
||||||
|
- Embree supports now Intel® oneAPI DPC++/C++ Compiler 2022.0.0
|
||||||
|
|
||||||
|
### Embree 3.13.3
|
||||||
|
- Invalid multi segment motion blurred normal oriented curves are properly excluded from BVH build.
|
||||||
|
- Fixing issue with normal oriented curve construction when center curve curvature is very large.
|
||||||
|
Due to this change normal oriented curve shape changes slightly.
|
||||||
|
- Fixed crash caused by disabling a geometry and then detaching it from the scene.
|
||||||
|
- Bugfix in emulated ray packet intersection when EMBREE_RAY_PACKETS is turned off.
|
||||||
|
- Bugfix for linear quaternion interpolation fallback.
|
||||||
|
- Fixed issues with spaces in path to Embree build folder.
|
||||||
|
- Some fixes to compile Embree in SSE mode using WebAssembly.
|
||||||
|
- Bugfix for occlusion rays with grids and ray packets.
|
||||||
|
- We do no longer provide installers for Windows and macOS, please use the ZIP files instead.
|
||||||
|
- Upgrading to Intel® ISPC 1.17.0 for release build.
|
||||||
|
- Upgrading to Intel® oneTBB 2021.5.0 for release build.
|
||||||
|
|
||||||
|
### Embree 3.13.2
|
||||||
|
- Avoiding spatial split positions that are slightly out of geometry bounds.
|
||||||
|
- Introduced rtcGetGeometryThreadSafe function, which is a thread safe version of rtcGetGeometry.
|
||||||
|
- Using more accurate rcp implementation.
|
||||||
|
- Bugfix to rare corner case of high quality BVH builder.
|
||||||
|
|
||||||
|
### Embree 3.13.1
|
||||||
|
- Added support for Intel® ISPC ARM target.
|
||||||
|
- Releases upgrade to Intel® TBB 2021.3.0 and Intel® ISPC 1.16.1
|
||||||
|
|
||||||
|
### Embree 3.13.0
|
||||||
|
- Added support for Apple M1 CPUs.
|
||||||
|
- RTC_SUBDIVISION_MODE_NO_BOUNDARY now works properly for non-manifold edges.
|
||||||
|
- CMake target 'uninstall' is not defined if it already exists.
|
||||||
|
- Embree no longer reads the .embree3 config files, thus all configuration has
|
||||||
|
to get passed through the config string to rtcNewDevice.
|
||||||
|
- Releases upgrade to Intel® TBB 2021.2.0 and Intel® ISPC 1.15.0
|
||||||
|
- Intel® TBB dll is automatically copied into build folder after build on windows.
|
||||||
|
|
||||||
|
### Embree 3.12.2
|
||||||
|
- Fixed wrong uv and Ng for grid intersector in robust mode for AVX.
|
||||||
|
- Removed optimizations for Knights Landing.
|
||||||
|
- Upgrading release builds to use Intel® oneTBB 2021.1.1
|
||||||
|
|
||||||
|
### Embree 3.12.1
|
||||||
|
|
||||||
|
- Changed default frequency level to SIMD128 for Skylake, Cannon Lake, Comet Lake and Tiger Lake CPUs.
|
||||||
|
This change typically improves performance for renderers that just use SSE by maintaining higher
|
||||||
|
CPU frequencies. In case your renderer is AVX optimized you can get higher ray tracing performance
|
||||||
|
by configuring the frequency level to simd256 through passing frequency_level=simd256 to rtcNewDevice.
|
||||||
|
|
||||||
|
### Embree 3.12.0
|
||||||
|
|
||||||
|
- Added linear cone curve geometry support. In this mode a real geometric surface for curves
|
||||||
|
with linear basis is rendered using capped cones. They are discontinuous at edge boundaries.
|
||||||
|
- Enabled fast two level builder for instances when low quality build is requested.
|
||||||
|
- Bugfix for BVH build when geometries got disabled.
|
||||||
|
- Added EMBREE_BACKFACE_CULLING_CURVES cmake option. This allows for a cheaper round
|
||||||
|
linear curve intersection when correct internal tracking and back hits are not required.
|
||||||
|
The new cmake option defaults to OFF.
|
||||||
|
- User geometries with invalid bounds with lower>upper in some dimension will be ignored.
|
||||||
|
- Increased robustness for grid interpolation code and fixed returned out of range u/v
|
||||||
|
coordinates for grid primitive.
|
||||||
|
- Fixed handling of motion blur time range for sphere, discs, and oriented disc geometries.
|
||||||
|
- Fixed missing model data in releases.
|
||||||
|
- Ensure compatibility to newer versions of Intel® oneTBB.
|
||||||
|
- Motion blur BVH nodes no longer store NaN values.
|
||||||
|
|
||||||
|
### Embree 3.11.0
|
||||||
|
|
||||||
|
- Round linear curves now automatically check for the existence of left and right
|
||||||
|
connected segments if the flags buffer is empty. Left segments exist if the
|
||||||
|
segment(id-1) + 1 == segment(id) and similarly for right segments.
|
||||||
|
- Implemented the min-width feature for curves and points, which allows to increase the
|
||||||
|
radius in a distance dependent way, such that the curve or points thickness is n pixels wide.
|
||||||
|
- Round linear curves are closed now also at their start.
|
||||||
|
- Embree no longer supports Visual Studio 2013 starting with this release.
|
||||||
|
- Bugfix in subdivision tessellation level assignment for non-quad base primitives
|
||||||
|
- Small meshes are directly added to top level build phase of two-level builder to reduce memory consumption.
|
||||||
|
- Enabled fast two level builder for user geometries when low quality build is requested.
|
||||||
|
|
||||||
|
### Embree 3.10.0
|
||||||
|
|
||||||
|
- Added EMBREE_COMPACT_POLYS CMake option which enables double indexed triangle and quad
|
||||||
|
leaves to reduce memory consumption in compact mode by an additional 40% at about
|
||||||
|
15% performance impact. This new mode is disabled by default.
|
||||||
|
- Compile fix for Intel® oneTBB 2021.1-beta05
|
||||||
|
- Releases upgrade to Intel® TBB 2020.2
|
||||||
|
- Compile fix for Intel® ISPC v1.13.0
|
||||||
|
- Adding RPATH to libembree.so in releases
|
||||||
|
- Increased required CMake version to 3.1.0
|
||||||
|
- Made instID member for array of pointers ray stream layout optional again.
|
||||||
|
|
||||||
|
### Embree 3.9.0
|
||||||
|
|
||||||
|
- Added round linear curve geometry support. In this mode a real geometric surface for curves
|
||||||
|
with linear basis is rendered using capped cones with spherical filling between
|
||||||
|
the curve segments.
|
||||||
|
- Added rtcGetSceneDevice API function, that returns the device a scene got created in.
|
||||||
|
- Improved performance of round curve rendering by up to 1.8x.
|
||||||
|
- Bugfix to sphere intersection filter invocation for back hit.
|
||||||
|
- Fixed wrong assertion that triggered for invalid curves which anyway get filtered out.
|
||||||
|
- RelWithDebInfo mode no longer enables assertions.
|
||||||
|
- Fixed an issue in FindTBB.cmake that caused compile error with Debug build under Linux.
|
||||||
|
- Embree releases no longer provide RPMs for Linux. Please use the RPMs coming with the package
|
||||||
|
manager of your Linux distribution.
|
||||||
|
|
||||||
|
### Embree 3.8.0
|
||||||
|
|
||||||
|
- Added collision detection support for user geometries (see rtcCollide API function)
|
||||||
|
- Passing geomID to user geometry callbacks.
|
||||||
|
- Bugfix in AVX512VL codepath for rtcIntersect1
|
||||||
|
- For sphere geometries the intersection filter gets now invoked for
|
||||||
|
front and back hit.
|
||||||
|
- Fixed some bugs for quaternion motion blur.
|
||||||
|
- RTCRayQueryContext always non-const in Embree API
|
||||||
|
- Made RTCHit aligned to 16 bytes in Embree API
|
||||||
|
|
||||||
|
### New Features in Embree 3.7.0
|
||||||
|
- Added quaternion motion blur for correct interpolation of rotational transformations.
|
||||||
|
- Fixed wrong bounding calculations when a motion blurred instance did
|
||||||
|
instantiate a motion blurred scene.
|
||||||
|
- In robust mode the depth test consistently uses tnear <= t <= tfar now in order
|
||||||
|
to robustly continue traversal at a previous hit point
|
||||||
|
in a way that guarantees reaching all hits, even hits at the same place.
|
||||||
|
- Fixed depth test in robust mode to be precise at tnear and tfar.
|
||||||
|
- Added next_hit tutorial to demonstrate robustly collecting all hits
|
||||||
|
along a ray using multiple ray queries.
|
||||||
|
- Implemented robust mode for curves. This has a small performance impact but
|
||||||
|
fixes bounding problems with flat curves.
|
||||||
|
- Improved quality of motion blur BVH by using linear bounds during binning.
|
||||||
|
- Implemented issue with motion blur builder where number of time segments
|
||||||
|
for SAH heuristic were counted wrong due to some numerical issues.
|
||||||
|
- Fixed an accuracy issue with rendering very short fat curves.
|
||||||
|
- rtcCommitScene can now get called during rendering from multiple threads
|
||||||
|
to lazily build geometry. When Intel® TBB is used this causes a much lower overhead
|
||||||
|
than using rtcJoinCommitScene.
|
||||||
|
- Geometries can now get attached to multiple scenes at the same time, which
|
||||||
|
simplifies mapping general scene graphs to API.
|
||||||
|
- Updated to Intel® TBB 2019.9 for release builds.
|
||||||
|
- Fixed a bug in the BVH builder for Grid geometries.
|
||||||
|
- Added macOS Catalina support to Embree releases.
|
||||||
|
|
||||||
|
### New Features in Embree 3.6.1
|
||||||
|
- Restored binary compatibility between Embree 3.6 and 3.5 when single-level instancing is used.
|
||||||
|
- Fixed bug in subgrid intersector
|
||||||
|
- Removed point query alignment in Intel® ISPC header
|
||||||
|
|
||||||
|
### New Features in Embree 3.6
|
||||||
|
- Added Catmull-Rom curve types.
|
||||||
|
- Added support for multi-level instancing.
|
||||||
|
- Added support for point queries.
|
||||||
|
- Fixed a bug preventing normal oriented curves being used unless timesteps were
|
||||||
|
specified.
|
||||||
|
- Fixed bug in external BVH builder when configured for dynamic build.
|
||||||
|
- Added support for new config flag "user_threads=N" to device initialization
|
||||||
|
which sets the number of threads used by Intel® TBB but created by the user.
|
||||||
|
- Fixed automatic vertex buffer padding when using rtcSetNewGeometry API function.
|
||||||
|
|
||||||
|
### New Features in Embree 3.5.2
|
||||||
|
- Added EMBREE_API_NAMESPACE cmake option that allows to put all Embree API functions
|
||||||
|
inside a user defined namespace.
|
||||||
|
- Added EMBREE_LIBRARY_NAME cmake option that allows to rename the Embree library.
|
||||||
|
- When Embree is compiled as static library, EMBREE_STATIC_LIB has no longer to get
|
||||||
|
defined before including the Embree API headers.
|
||||||
|
- Added CPU frequency_level device configuration to allow an application to specify the
|
||||||
|
frequency level it wants to run on. This forces Embree to not use optimizations that
|
||||||
|
may reduce the CPU frequency below that level. By default Embree is configured to the
|
||||||
|
the AVX-heavy frequency level, thus if the application uses solely non-AVX code, configuring
|
||||||
|
the Embree device with "frequency_level=simd128" may give better performance.
|
||||||
|
- Fixed a bug in the spatial split builder which caused it to fail
|
||||||
|
for scenes with more than 2^24 geometries.
|
||||||
|
|
||||||
|
### New Features in Embree 3.5.1
|
||||||
|
- Fixed ray/sphere intersector to work also for non-normalized rays.
|
||||||
|
- Fixed self intersection avoidance for ray oriented discs when
|
||||||
|
non-normalized rays were used.
|
||||||
|
- Increased maximal face valence for subdiv patch to 64 and reduced stack size
|
||||||
|
requirement for subdiv patch evaluation.
|
||||||
|
|
||||||
|
### New Features in Embree 3.5.0
|
||||||
|
- Changed normal oriented curve definition to fix waving artefacts.
|
||||||
|
- Fixed bounding issue for normal oriented motion blurred curves.
|
||||||
|
- Fixed performance issue with motion blurred point geometry.
|
||||||
|
- Fixed generation of documentation with new pandoc versions.
|
||||||
|
|
||||||
|
### New Features in Embree 3.4.0
|
||||||
|
- Added point primitives (spheres, ray-oriented discs, normal-oriented discs).
|
||||||
|
- Fixed crash triggered by scenes with only invalid primitives.
|
||||||
|
- Improved robustness of quad/grid-based intersectors.
|
||||||
|
- Upgraded to Intel® TBB 2019.2 for release builds.
|
||||||
|
|
||||||
|
### New Features in Embree 3.3.0
|
||||||
|
- Added support for motion blur time range per geometry. This way geometries
|
||||||
|
can appear and disappear during the camera shutter and time steps do not have
|
||||||
|
to start and end at camera shutter interval boundaries.
|
||||||
|
- Fixed crash with pathtracer when using --triangle-sphere command line.
|
||||||
|
- Fixed crash with pathtracer when using --shader ao command line.
|
||||||
|
- Fixed tutorials showing a black window on macOS 10.14 until moved.
|
||||||
|
|
||||||
|
### New Features in Embree 3.2.4
|
||||||
|
- Fixed compile issues with ICC 2019.
|
||||||
|
- Released ZIP files for Windows are now provided in a
|
||||||
|
version linked against Visual Studio 2013 and Visual Studio 2015.
|
||||||
|
|
||||||
|
### New Features in Embree 3.2.3
|
||||||
|
- Fixed crash when using curves with RTC_SCENE_FLAG_DYNAMIC
|
||||||
|
combined with RTC_BUILD_QUALITY_MEDIUM.
|
||||||
|
|
||||||
|
### New Features in Embree 3.2.2
|
||||||
|
- Fixed intersection distance for unnormalized rays with line segments.
|
||||||
|
- Removed libmmd.dll dependency in release builds for Windows.
|
||||||
|
- Fixed detection of AppleClang compiler under MacOSX.
|
||||||
|
|
||||||
|
### New Features in Embree 3.2.1
|
||||||
|
- Bugfix in flat mode for hermite curves.
|
||||||
|
- Added EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR cmake option to
|
||||||
|
control self intersection avoidance for flat curves.
|
||||||
|
- Performance fix when instantiating motion blurred scenes. The application
|
||||||
|
should best use two (or more) time steps for an instance that instantiates
|
||||||
|
a motion blurred scene.
|
||||||
|
- Fixed AVX512 compile issue with GCC 6.1.1.
|
||||||
|
- Fixed performance issue with rtcGetGeometryUserData when used
|
||||||
|
during rendering.
|
||||||
|
- Bugfix in length of derivatives for grid geometry.
|
||||||
|
- Added BVH8 support for motion blurred curves and lines. For some workloads
|
||||||
|
this increases performance by up to 7%.
|
||||||
|
- Fixed rtcGetGeometryTransform to return the local to world transform.
|
||||||
|
- Fixed bug in multi segment motion blur that caused missing of perfectly
|
||||||
|
axis aligned geometry.
|
||||||
|
- Reduced memory consumption of small scenes by 4x.
|
||||||
|
- Reduced temporal storage of grid builder.
|
||||||
|
|
||||||
|
### New Features in Embree 3.2.0
|
||||||
|
- Improved watertightness of robust mode.
|
||||||
|
- Line segments, and other curves are now all contained in a single
|
||||||
|
BVH which improves performance when these are both used in a scene.
|
||||||
|
- Performance improvement of up to 20% for line segments.
|
||||||
|
- Bugfix to Embree2 to Embree3 conversion script.
|
||||||
|
- Added support for Hermite curve basis.
|
||||||
|
- Semantics of normal buffer for normal oriented curves has
|
||||||
|
changed to simplify usage. Please see documentation for details.
|
||||||
|
- Using GLFW and imgui in tutorials.
|
||||||
|
- Fixed floating point exception in static variable initialization.
|
||||||
|
- Fixed invalid memory access in rtcGetGeometryTransform for non-motion
|
||||||
|
blur instances.
|
||||||
|
- Improved self intersection avoidance for flat curves. Transparency rays
|
||||||
|
with tnear set to previous hit distance do not need curve radius
|
||||||
|
based self intersection avoidance as same hit is calculated again. For this
|
||||||
|
reason self intersection avoidance is now only applied to ray origin.
|
||||||
|
|
||||||
|
### New Features in Embree 3.1.0
|
||||||
|
- Added new normal-oriented curve primitive for ray tracing of grass-like
|
||||||
|
structures.
|
||||||
|
- Added new grid primitive for ray tracing tessellated and displaced surfaces
|
||||||
|
in very memory efficient manner.
|
||||||
|
- Fixed bug of ribbon curve intersector when derivative was zero.
|
||||||
|
- Installing all static libraries when EMBREE_STATIC_LIB is enabled.
|
||||||
|
- Added API functions to access topology of subdivision mesh.
|
||||||
|
- Reduced memory consumption of instances.
|
||||||
|
- Improved performance of instances by 8%.
|
||||||
|
- Reduced memory consumption of curves by up to 2x.
|
||||||
|
- Up to 5% higher performance on AVX-512 architectures.
|
||||||
|
- Added native support for multiple curve basis functions. Internal
|
||||||
|
basis conversions are no longer performed, which saves additional
|
||||||
|
memory when multiple bases are used.
|
||||||
|
- Fixed issue with non thread safe local static variable initialization
|
||||||
|
in VS2013.
|
||||||
|
- Bugfix in rtcSetNewGeometry. Vertex buffers did not get properly
|
||||||
|
overallocated.
|
||||||
|
- Replaced ImageMagick with OpenImageIO in the tutorials.
|
||||||
|
|
||||||
|
### New Features in Embree 3.0.0
|
||||||
|
- Switched to a new version of the API which provides improved
|
||||||
|
flexibility but is not backward compatible. Please see "Upgrading
|
||||||
|
from Embree 2 to Embree 3" section of the documentation for upgrade
|
||||||
|
instructions. In particular, we provide a Python script that performs
|
||||||
|
most of the transition work.
|
||||||
|
- User geometries inside an instanced scene and a top-level scene no
|
||||||
|
longer need to handle the instID field of the ray differently. They
|
||||||
|
both just need to copy the context.instID into the ray.instID field.
|
||||||
|
- Support for context filter functions that can be assigned to a ray
|
||||||
|
query.
|
||||||
|
- User geometries can now invoke filter functions using the
|
||||||
|
rtcFilterIntersection and rtcFilterOcclusion calls.
|
||||||
|
- Higher flexibility through specifying build quality per scene and
|
||||||
|
geometry.
|
||||||
|
- Geometry normal uses commonly used right-hand rule from now on.
|
||||||
|
- Added self-intersection avoidance to ribbon curves and lines.
|
||||||
|
Applications do not have to implement self-intersection workarounds
|
||||||
|
for these primitive types anymore.
|
||||||
|
- Added support for 4 billion primitives in a single scene.
|
||||||
|
- Removed the RTC_MAX_USER_VERTEX_BUFFERS and RTC_MAX_INDEX_BUFFERS
|
||||||
|
limitations.
|
||||||
|
- Reduced memory consumption by 192 bytes per instance.
|
||||||
|
- Fixed some performance issues on AVX-512 architectures.
|
||||||
|
- Individual Contributor License Agreement (ICLA) and Corporate
|
||||||
|
Contributor License Agreement (CCLA) no longer required to
|
||||||
|
contribute to the project.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.5
|
||||||
|
- Improved watertightness of robust mode.
|
||||||
|
- Fixed floating point exception in static variable initialization.
|
||||||
|
- Fixed AVX512 compile issue with GCC 6.1.1.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.4
|
||||||
|
- Fixed AVX512 compile issue with GCC 7.
|
||||||
|
- Fixed issue with not thread safe local static variable
|
||||||
|
initialization in VS2013.
|
||||||
|
- Fixed bug in the 4 and 8-wide packet intersection of instances with
|
||||||
|
multi-segment motion blur on AVX-512 architectures.
|
||||||
|
- Fixed bug in rtcOccluded4/8/16 when only AVX-512 ISA was enabled.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.3
|
||||||
|
- Fixed GCC compile warning in debug mode.
|
||||||
|
- Fixed bug of ribbon curve intersector when derivative was zero.
|
||||||
|
- Installing all static libraries when EMBREE_STATIC_LIB is enabled.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.2
|
||||||
|
- Made BVH build of curve geometry deterministic.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.1
|
||||||
|
- Improved performance of occlusion ray packets by up to 50%.
|
||||||
|
- Fixed detection of Clang for CMake 3 under MacOSX
|
||||||
|
- Fixed AVX code compilation issue with GCC 7 compiler caused by
|
||||||
|
explicit use of vzeroupper intrinsics.
|
||||||
|
- Fixed an issue where Clang address sanitizer reported an error in
|
||||||
|
the internal tasking system.
|
||||||
|
- Added fix to compile on 32 bit Linux distribution.
|
||||||
|
- Fixed some wrong relative include paths in Embree.
|
||||||
|
- Improved performance of robust single ray mode by 5%.
|
||||||
|
- Added EMBREE_INSTALL_DEPENDENCIES option (default OFF) to enable
|
||||||
|
installing of Embree dependencies.
|
||||||
|
- Fixed performance regression for occlusion ray streams.
|
||||||
|
- Reduced temporary memory requirements of BVH builder for curves and
|
||||||
|
line segments.
|
||||||
|
- Fixed performance regression for user geometries and packet ray tracing.
|
||||||
|
- Fixed bug where wrong closest hit was reported for very curvy hair segment.
|
||||||
|
|
||||||
|
### New Features in Embree 2.17.0
|
||||||
|
- Improved packet ray tracing performance for coherent rays by 10-60%
|
||||||
|
(requires RTC_INTERSECT_COHERENT flag).
|
||||||
|
- Improved ray tracing performance for incoherent rays on
|
||||||
|
AVX-512 architectures by 5%.
|
||||||
|
- Improved ray tracing performance for streams of incoherent rays
|
||||||
|
by 5-15%.
|
||||||
|
- Fixed tbb_debug.lib linking error under Windows.
|
||||||
|
- Fast coherent ray stream and packet code paths now also work in robust mode.
|
||||||
|
- Using less aggressive prefetching for large BVH nodes which
|
||||||
|
results in 1-2% higher ray tracing performance.
|
||||||
|
- Precompiled binaries have stack-protector enabled, except for
|
||||||
|
traversal kernels. BVH builders can be slightly slower due to this
|
||||||
|
change. If you want stack-protectors disabled please turn off
|
||||||
|
EMBREE_STACK_PROTECTOR in cmake and build the binaries yourself.
|
||||||
|
- When enabling ISAs individually, the 8-wide BVH was previously only
|
||||||
|
available when the AVX ISA was also selected. This issue is now
|
||||||
|
fixed, and one can enable only AVX2 and still get best
|
||||||
|
performance by using an 8-wide BVH.
|
||||||
|
- Fixed rtcOccluded1 and rtcOccluded1Ex API functions which were
|
||||||
|
broken in Intel® ISPC.
|
||||||
|
- Providing MSI installer for Windows.
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.5
|
||||||
|
- Bugfix in the robust triangle intersector that rarely caused NaNs.
|
||||||
|
- Fixed bug in hybrid traversal kernel when BVH leaf was entered with no
|
||||||
|
active rays. This rarely caused crashes when used with instancing.
|
||||||
|
- Fixed bug introduced in Embree 2.16.2 which caused instancing not to
|
||||||
|
work properly when a smaller than the native SIMD width was
|
||||||
|
used in ray packet mode.
|
||||||
|
- Fixed bug in the curve geometry intersector that caused rendering
|
||||||
|
artefacts for Bézier curves with p0=p1 and/or p2=p3.
|
||||||
|
- Fixed bug in the curve geometry intersector that caused hit results
|
||||||
|
with NaNs to be reported.
|
||||||
|
- Fixed masking bug that caused rare cracks in curve geometry.
|
||||||
|
- Enabled support for SSE2 in precompiled binaries again.
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.4
|
||||||
|
- Bugfix in the ribbon intersector for hair primitives. Non-normalized
|
||||||
|
rays caused wrong intersection distance to be reported.
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.3
|
||||||
|
- Increased accuracy for handling subdivision surfaces. This fixes
|
||||||
|
cracks when using displacement mapping but reduces performance
|
||||||
|
at irregular vertices.
|
||||||
|
- Fixed a bug where subdivision geometry was not properly updated
|
||||||
|
when modifying only the tessellation rate and vertex array.
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.2
|
||||||
|
- Fixed bug that caused NULL ray query context in intersection
|
||||||
|
filter when instancing was used.
|
||||||
|
- Fixed an issue where uv's where outside the triangle (or quad) for
|
||||||
|
very small triangles (or quads). In robust mode we improved the uv
|
||||||
|
calculation to avoid that issue, in fast mode we accept that
|
||||||
|
inconsistency for better performance.
|
||||||
|
- Changed UV encoding for non-quad subdivision patches to
|
||||||
|
allow a subpatch UV range of `[-0.5,1.5[`. Using this new encoding
|
||||||
|
one can use finite differences to calculate derivatives if required.
|
||||||
|
Please adjust your code in case you rely on the old encoding.
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.1
|
||||||
|
- Workaround for compile issues with Visual Studio 2017
|
||||||
|
- Fixed bug in subdiv code for static scenes when using tessellation
|
||||||
|
levels larger than 50.
|
||||||
|
- Fixed low performance when adding many geometries to a scene.
|
||||||
|
- Fixed high memory consumption issue when using instances in
|
||||||
|
dynamic scene (by disabling two level builder for user geometries
|
||||||
|
and instances).
|
||||||
|
|
||||||
|
### New Features in Embree 2.16.0
|
||||||
|
- Improved multi-segment motion blur support for scenes with
|
||||||
|
different number of time steps per mesh.
|
||||||
|
- New top level BVH builder that improves build times and BVH quality
|
||||||
|
of two-level BVHs.
|
||||||
|
- Added support to enable only a single ISA. Previously code was
|
||||||
|
always compiled for SSE2.
|
||||||
|
- Improved single ray tracing performance for incoherent rays on
|
||||||
|
AVX-512 architectures by 5-10%.
|
||||||
|
- Improved packet/hybrid ray tracing performance for incoherent rays
|
||||||
|
on AVX-512 architectures by 10-30%.
|
||||||
|
- Improved stream ray tracing performance for coherent rays in
|
||||||
|
structure-of-pointers layout by 40-70%.
|
||||||
|
- BVH builder for compact scenes of triangles and quads needs
|
||||||
|
essentially no temporary memory anymore. This doubles the
|
||||||
|
maximal scene size that can be rendered in compact mode.
|
||||||
|
- Triangles no longer store the geometry normal in fast/default mode
|
||||||
|
which reduces memory consumption by up to 20%.
|
||||||
|
- Compact mode uses BVH4 now consistently which reduces memory
|
||||||
|
consumption by up to 10%.
|
||||||
|
- Reduced memory consumption for small scenes (of 10k-100k primitives)
|
||||||
|
and dynamic scenes.
|
||||||
|
- Improved performance of user geometries and instances through BVH8
|
||||||
|
support.
|
||||||
|
- The API supports now specifying the geometry ID of a geometry at
|
||||||
|
construction time. This way matching the geometry ID used by
|
||||||
|
Embree and the application is simplified.
|
||||||
|
- Fixed a bug that would have caused a failure of the BVH builder
|
||||||
|
for dynamic scenes when run on a machine with more then 1000 threads.
|
||||||
|
- Fixed a bug that could have been triggered when reaching the maximal
|
||||||
|
number of mappings under Linux (`vm.max_map_count`). This could have
|
||||||
|
happened when creating a large number of small static scenes.
|
||||||
|
- Added huge page support for Windows and MacOSX (experimental).
|
||||||
|
- Added support for Visual Studio 2017.
|
||||||
|
- Removed support for Visual Studio 2012.
|
||||||
|
- Precompiled binaries now require a CPU supporting at least the
|
||||||
|
SSE4.2 ISA.
|
||||||
|
- We no longer provide precompiled binaries for 32-bit on Windows.
|
||||||
|
- Under Windows one now has to use the platform toolset option in
|
||||||
|
CMake to switch to Clang or the Intel® Compiler.
|
||||||
|
- Fixed a bug for subdivision meshes when using the incoherent scene
|
||||||
|
flag.
|
||||||
|
- Fixed a bug in the line geometry intersection, that caused reporting
|
||||||
|
an invalid line segment intersection with primID -1.
|
||||||
|
- Buffer stride for vertex buffers of different time steps of triangle
|
||||||
|
and quad meshes have to be identical now.
|
||||||
|
- Fixed a bug in the curve geometry intersection code when passed a
|
||||||
|
perfect cylinder.
|
||||||
|
|
||||||
|
### New Features in Embree 2.15.0
|
||||||
|
|
||||||
|
- Added `rtcCommitJoin` mode that allows thread to join a build
|
||||||
|
operation. When using the internal tasking system this allows
|
||||||
|
Embree to solely use the threads that called `rtcCommitJoin` to
|
||||||
|
build the scene, while previously also normal worker threads
|
||||||
|
participated in the build. You should no longer use `rtcCommit`
|
||||||
|
to join a build.
|
||||||
|
- Added `rtcDeviceSetErrorFunction2` API call, which sets an error
|
||||||
|
callback function which additionally gets passed a user provided
|
||||||
|
pointer (`rtcDeviceSetErrorFunction` is now deprecated).
|
||||||
|
- Added `rtcDeviceSetMemoryMonitorFunction2` API call, which sets a
|
||||||
|
memory monitor callback function which additionally get passed a
|
||||||
|
user provided pointer. (`rtcDeviceSetMemoryMonitorFunction` is now
|
||||||
|
deprecated).
|
||||||
|
- Build performance for hair geometry improved by up to 2×.
|
||||||
|
- Standard BVH build performance increased by 5%.
|
||||||
|
- Added API extension to use internal Morton-code based builder, the
|
||||||
|
standard binned-SAH builder, and the spatial split-based SAH builder.
|
||||||
|
- Added support for BSpline hair and curves. Embree uses
|
||||||
|
either the Bézier or BSpline basis internally, and converts other
|
||||||
|
curves, which requires more memory during rendering. For reduced
|
||||||
|
memory consumption set the `EMBREE_NATIVE_SPLINE_BASIS` to the basis
|
||||||
|
your application uses (which is set to `BEZIER` by default).
|
||||||
|
- Setting the number of threads through `tbb::taskscheduler_init`
|
||||||
|
object on the application side is now working properly.
|
||||||
|
- Windows and Linux releases are build using AVX-512 support.
|
||||||
|
- Implemented hybrid traversal for hair and line segments for
|
||||||
|
improved ray packet performance.
|
||||||
|
- AVX-512 code compiles with Clang 4.0.0
|
||||||
|
- Fixed crash when ray packets were disabled in CMake.
|
||||||
|
|
||||||
|
### New Features in Embree 2.14.0
|
||||||
|
|
||||||
|
- Added `ignore_config_files` option to init flags that allows the
|
||||||
|
application to ignore Embree configuration files.
|
||||||
|
- Face-varying interpolation is now supported for subdivision surfaces.
|
||||||
|
- Up to 16 user vertex buffers are supported for vertex
|
||||||
|
attribute interpolation.
|
||||||
|
- Deprecated `rtcSetBoundaryMode` function, please use the new
|
||||||
|
`rtcSetSubdivisionMode` function.
|
||||||
|
- Added `RTC_SUBDIV_PIN_BOUNDARY` mode for handling boundaries of
|
||||||
|
subdivision meshes.
|
||||||
|
- Added `RTC_SUBDIV_PIN_ALL` mode to enforce linear interpolation
|
||||||
|
for subdivision meshes.
|
||||||
|
- Optimized object generation performance for dynamic scenes.
|
||||||
|
- Reduced memory consumption when using lots of small dynamic objects.
|
||||||
|
- Fixed bug for subdivision surfaces using low tessellation rates.
|
||||||
|
- Hair geometry now uses a new ribbon intersector that intersects with
|
||||||
|
ray-facing quads. The new intersector also returns the v-coordinate
|
||||||
|
of the hair intersection, and fixes artefacts at junction points
|
||||||
|
between segments, at the cost of a small performance hit.
|
||||||
|
- Added `rtcSetBuffer2` function, that additionally gets the number of
|
||||||
|
elements of a buffer. In dynamic scenes, this function allows to
|
||||||
|
quickly change buffer sizes, making it possible to change the number
|
||||||
|
of primitives of a mesh or the number of crease features for
|
||||||
|
subdivision surfaces.
|
||||||
|
- Added simple 'viewer_anim' tutorial for rendering key
|
||||||
|
frame animations and 'buildbench' for measuring BVH (re-)build
|
||||||
|
performance for static and dynamic scenes.
|
||||||
|
- Added more AVX-512 optimizations for future architectures.
|
||||||
|
|
||||||
|
### New Features in Embree 2.13.0
|
||||||
|
|
||||||
|
- Improved performance for compact (but not robust) scenes.
|
||||||
|
- Added robust mode for motion blurred triangles and quads.
|
||||||
|
- Added fast dynamic mode for user geometries.
|
||||||
|
- Up to 20% faster BVH build performance on the second generation
|
||||||
|
Intel® Xeon Phi™ processor codenamed Knights Landing.
|
||||||
|
- Improved quality of the spatial split builder.
|
||||||
|
- Improved performance for coherent streams of ray packets (SOA
|
||||||
|
layout), e.g. for fast primary visibility.
|
||||||
|
- Various bug fixes in tessellation cache, quad-based spatial
|
||||||
|
split builder, etc.
|
||||||
|
|
||||||
|
### New Features in Embree 2.12.0
|
||||||
|
|
||||||
|
- Added support for multi-segment motion blur for all primitive types.
|
||||||
|
- API support for stream of pointers to single rays (`rtcIntersect1Mp`
|
||||||
|
and `rtcOccluded1Mp`)
|
||||||
|
- Improved BVH refitting performance for dynamic scenes.
|
||||||
|
- Improved high-quality mode for quads (added spatial split builder
|
||||||
|
for quads)
|
||||||
|
- Faster dynamic scenes for triangle and quad-based meshes on AVX2
|
||||||
|
enabled machines.
|
||||||
|
- Performance and correctness bugfix in optimization for streams of
|
||||||
|
coherent (single) rays.
|
||||||
|
- Fixed large memory consumption (issue introduced in Embree v2.11.0).
|
||||||
|
If you use Embree v2.11.0 please upgrade to Embree v2.12.0.
|
||||||
|
- Reduced memory consumption for dynamic scenes containing small
|
||||||
|
meshes.
|
||||||
|
- Added support to start and affinitize Intel® TBB worker threads by passing
|
||||||
|
"`start_threads=1,set_affinity=1`" to `rtcNewDevice`. These settings
|
||||||
|
are recommended on systems with a high thread count.
|
||||||
|
- `rtcInterpolate2` can now be called within a displacement shader.
|
||||||
|
- Added initial support for Microsoft's Parallel Pattern Library (PPL)
|
||||||
|
as tasking system alternative (for optimal performance Intel® TBB is
|
||||||
|
highly recommended).
|
||||||
|
- Updated to Intel® TBB 2017 which is released under the Apache v2.0 license.
|
||||||
|
- Dropped support for Visual Studio 2012 Win32 compiler. Visual Studio
|
||||||
|
2012 x64 is still supported.
|
||||||
|
|
||||||
|
### New Features in Embree 2.11.0
|
||||||
|
|
||||||
|
- Improved performance for streams of coherent (single) rays flagged
|
||||||
|
with `RTC_INTERSECT_COHERENT`. For such coherent ray streams, e.g.
|
||||||
|
primary rays, the performance typically improves by 1.3-2×.
|
||||||
|
- New spatial split BVH builder for triangles, which is 2-6× faster
|
||||||
|
than the previous version and more memory conservative.
|
||||||
|
- Improved performance and scalability of all standard BVH builders on
|
||||||
|
systems with large core counts.
|
||||||
|
- Fixed `rtcGetBounds` for motion blur scenes.
|
||||||
|
- Thread affinity is now on by default when running on the latest
|
||||||
|
Intel® Xeon Phi™ processor.
|
||||||
|
- Added AVX-512 support for future Intel® Xeon processors.
|
||||||
|
|
||||||
|
### New Features in Embree 2.10.0
|
||||||
|
|
||||||
|
- Added a new curve geometry which renders the sweep surface of a
|
||||||
|
circle along a Bézier curve.
|
||||||
|
- Intersection filters can update the `tfar` ray distance.
|
||||||
|
- Geometry types can get disabled at compile time.
|
||||||
|
- Modified and extended the ray stream API.
|
||||||
|
- Added new callback mechanism for the ray stream API.
|
||||||
|
- Improved ray stream performance (up to 5-10%).
|
||||||
|
- Up to 20% faster morton builder on machines with large core counts.
|
||||||
|
- Lots of optimizations for the second generation Intel® Xeon Phi™
|
||||||
|
processor codenamed Knights Landing.
|
||||||
|
- Added experimental support for compressed BVH nodes (reduces node
|
||||||
|
size to 56-62% of uncompressed size). Compression introduces a
|
||||||
|
typical performance overhead of ~10%.
|
||||||
|
- Bugfix in backface culling mode. We do now properly cull the
|
||||||
|
backfaces and not the frontfaces.
|
||||||
|
- Feature freeze for the first generation Intel® Xeon Phi™ coprocessor
|
||||||
|
codenamed Knights Corner. We will still maintain and add bug fixes
|
||||||
|
to Embree v2.9.0, but Embree 2.10 and future versions will no longer
|
||||||
|
support it.
|
||||||
|
|
||||||
|
### New Features in Embree 2.9.0
|
||||||
|
|
||||||
|
- Improved shadow ray performance (10-100% depending on the scene).
|
||||||
|
- Added initial support for ray streams (10-30% higher performance
|
||||||
|
depending on ray coherence in the stream).
|
||||||
|
- Added support to calculate second order derivatives using the
|
||||||
|
`rtcInterpolate2` function.
|
||||||
|
- Changed the parametrization for triangular subdivision faces to
|
||||||
|
the same scheme used for pentagons.
|
||||||
|
- Added support to query the Embree configuration using the
|
||||||
|
`rtcDeviceGetParameter` function.
|
||||||
|
|
||||||
|
### New Features in Embree 2.8.1
|
||||||
|
|
||||||
|
- Added support for setting per geometry tessellation rate (supported
|
||||||
|
for subdivision and Bézier geometries).
|
||||||
|
- Added support for motion blurred instances.
|
||||||
|
|
||||||
|
### New Features in Embree 2.8.0
|
||||||
|
|
||||||
|
- Added support for line segment geometry.
|
||||||
|
- Added support for quad geometry (replaces triangle-pairs feature).
|
||||||
|
- Added support for linear motion blur of user geometries.
|
||||||
|
- Improved performance through AVX-512 optimizations.
|
||||||
|
- Improved performance of lazy scene build (when using Intel® TBB 4.4 update
|
||||||
|
2).
|
||||||
|
- Improved performance through huge page support under linux.
|
||||||
|
|
||||||
|
### New Features in Embree 2.7.1
|
||||||
|
|
||||||
|
- Internal tasking system supports cancellation of build operations.
|
||||||
|
- Intel® ISPC mode for robust and compact scenes got significantly faster
|
||||||
|
(implemented hybrid traversal for bvh4.triangle4v and
|
||||||
|
bvh4.triangle4i).
|
||||||
|
- Hair rendering got faster as we fixed some issues with the SAH
|
||||||
|
heuristic cost factors.
|
||||||
|
- BVH8 got slight faster for single ray traversal (improved sorting
|
||||||
|
when hitting more than 4 boxes).
|
||||||
|
- BVH build performance got up to 30% faster on CPUs with high core
|
||||||
|
counts (improved parallel partition code).
|
||||||
|
- High quality build mode again working properly (spatial splits had
|
||||||
|
been deactivated in v2.7.0 due to some bug).
|
||||||
|
- Support for merging two adjacent triangles sharing a common edge
|
||||||
|
into a triangle-pair primitive (can reduce memory consumption and
|
||||||
|
BVH build times by up to 50% for mostly quad-based input meshes).
|
||||||
|
- Internal cleanups (reduced number of traversal kernels by more
|
||||||
|
templating).
|
||||||
|
- Reduced stack size requirements of BVH builders.
|
||||||
|
- Fixed crash for dynamic scenes, triggered by deleting all
|
||||||
|
geometries from the scene.
|
||||||
|
|
||||||
|
### New Features in Embree 2.7.0
|
||||||
|
|
||||||
|
- Added device concept to Embree to allow different components of an
|
||||||
|
application to use Embree without interfering with each other.
|
||||||
|
- Fixed memory leak in twolevel builder used for dynamic scenes.
|
||||||
|
- Fixed bug in tessellation cache that caused crashes for subdivision
|
||||||
|
surfaces.
|
||||||
|
- Fixed bug in internal task scheduler that caused deadlocks when
|
||||||
|
using `rtcCommitThread`.
|
||||||
|
- Improved hit-distance accuracy for thin triangles in robust mode.
|
||||||
|
- Added support to disable ray packet support in cmake.
|
||||||
|
|
||||||
|
### New Features in Embree 2.6.2
|
||||||
|
|
||||||
|
- Fixed bug triggered by instantiating motion blur geometry.
|
||||||
|
- Fixed bug in hit UV coordinates of static subdivision geometries.
|
||||||
|
- Performance improvements when only changing tessellation levels for
|
||||||
|
subdivision geometry per frame.
|
||||||
|
- Added ray packet intersectors for subdivision geometry, resulting in
|
||||||
|
improved performance for coherent rays.
|
||||||
|
- Reduced virtual address space usage for static geometries.
|
||||||
|
- Fixed some AVX2 code paths when compiling with GCC or Clang.
|
||||||
|
- Bugfix for subdiv patches with non-matching winding order.
|
||||||
|
- Bugfix in ISA detection of AVX-512.
|
||||||
|
|
||||||
|
### New Features in Embree 2.6.1
|
||||||
|
|
||||||
|
- Major performance improvements for ray tracing subdivision surfaces,
|
||||||
|
e.g. up to 2× faster for scenes where only the tessellation levels
|
||||||
|
are changing per frame, and up to 3× faster for scenes with lots of
|
||||||
|
crease features
|
||||||
|
- Initial support for architectures supporting the new 16-wide AVX-512
|
||||||
|
ISA
|
||||||
|
- Implemented intersection filter callback support for subdivision
|
||||||
|
surfaces
|
||||||
|
- Added `RTC_IGNORE_INVALID_RAYS` CMake option which makes the ray
|
||||||
|
intersectors more robust against full tree traversal caused by
|
||||||
|
invalid ray inputs (e.g. INF, NaN, etc)
|
||||||
|
|
||||||
|
### New Features in Embree 2.6.0
|
||||||
|
|
||||||
|
- Added `rtcInterpolate` function to interpolate per vertex
|
||||||
|
attributes
|
||||||
|
- Added `rtcSetBoundaryMode` function that can be used to select the
|
||||||
|
boundary handling for subdivision surfaces
|
||||||
|
- Fixed a traversal bug that caused rays with very small ray
|
||||||
|
direction components to miss geometry
|
||||||
|
- Performance improvements for the robust traversal mode
|
||||||
|
- Fixed deadlock when calling `rtcCommit` from multiple
|
||||||
|
threads on same scene
|
||||||
|
|
||||||
|
### New Features in Embree 2.5.1
|
||||||
|
|
||||||
|
- On dual socket workstations, the initial BVH build performance
|
||||||
|
almost doubled through a better memory allocation scheme
|
||||||
|
- Reduced memory usage for subdivision surface objects with crease
|
||||||
|
features
|
||||||
|
- `rtcCommit` performance is robust against unset "flush to zero" and
|
||||||
|
"denormals are zero" flags. However, enabling these flags in your
|
||||||
|
application is still recommended
|
||||||
|
- Reduced memory usage for subdivision surfaces with borders and
|
||||||
|
infinitely sharp creases
|
||||||
|
- Lots of internal cleanups and bug fixes for both Intel® Xeon® and
|
||||||
|
Intel® Xeon Phi™
|
||||||
|
|
||||||
|
### New Features in Embree 2.5.0
|
||||||
|
|
||||||
|
- Improved hierarchy build performance on both Intel Xeon and Intel
|
||||||
|
Xeon Phi
|
||||||
|
- Vastly improved tessellation cache for ray tracing subdivision
|
||||||
|
surfaces
|
||||||
|
- Added `rtcGetUserData` API call to query per geometry user pointer
|
||||||
|
set through `rtcSetUserData`
|
||||||
|
- Added support for memory monitor callback functions to track and
|
||||||
|
limit memory consumption
|
||||||
|
- Added support for progress monitor callback functions to track build
|
||||||
|
progress and cancel long build operations
|
||||||
|
- BVH builders can be used to build user defined hierarchies inside
|
||||||
|
the application (see tutorial [BVH Builder])
|
||||||
|
- Switched to Intel® TBB as default tasking system on Xeon to get even faster
|
||||||
|
hierarchy build times and better integration for applications that
|
||||||
|
also use Intel® TBB
|
||||||
|
- `rtcCommit` can get called from multiple Intel® TBB threads to join the
|
||||||
|
hierarchy build operations
|
||||||
|
|
||||||
|
### New Features in Embree 2.4
|
||||||
|
|
||||||
|
- Support for Catmull Clark subdivision surfaces (triangle/quad base
|
||||||
|
primitives)
|
||||||
|
- Support for vector displacements on Catmull Clark subdivision
|
||||||
|
surfaces
|
||||||
|
- Various bug fixes (e.g. 4-byte alignment of vertex buffers works)
|
||||||
|
|
||||||
|
### New Features in Embree 2.3.3
|
||||||
|
|
||||||
|
- BVH builders more robustly handle invalid input data (Intel Xeon
|
||||||
|
processor family)
|
||||||
|
- Motion blur support for hair geometry (Xeon)
|
||||||
|
- Improved motion blur performance for triangle geometry (Xeon)
|
||||||
|
- Improved robust ray tracing mode (Xeon)
|
||||||
|
- Added `rtcCommitThread` API call for easier integration into
|
||||||
|
existing tasking systems (Xeon and Intel Xeon Phi coprocessor)
|
||||||
|
- Added support for recording and replaying all
|
||||||
|
`rtcIntersect`/`rtcOccluded` calls (Xeon and Xeon Phi)
|
||||||
|
|
||||||
|
### New Features in Embree 2.3.2
|
||||||
|
|
||||||
|
- Improved mixed AABB/OBB-BVH for hair geometry (Xeon Phi)
|
||||||
|
- Reduced amount of pre-allocated memory for BVH builders (Xeon Phi)
|
||||||
|
- New 64-bit Morton code-based BVH builder (Xeon Phi)
|
||||||
|
- (Enhanced) Morton code-based BVH builders use now tree rotations to
|
||||||
|
improve BVH quality (Xeon Phi)
|
||||||
|
- Bug fixes (Xeon and Xeon Phi)
|
||||||
|
|
||||||
|
### New Features in Embree 2.3.1
|
||||||
|
|
||||||
|
- High quality BVH mode improves spatial splits which result in up to
|
||||||
|
30% performance improvement for some scenes (Xeon)
|
||||||
|
- Compile time enabled intersection filter functions do not reduce
|
||||||
|
performance if no intersection filter is used in the scene (Xeon and
|
||||||
|
Xeon Phi)
|
||||||
|
- Improved ray tracing performance for hair geometry by \>20% on Xeon
|
||||||
|
Phi. BVH for hair geometry requires 20% less memory
|
||||||
|
- BVH8 for AVX/AVX2 targets improves performance for single ray
|
||||||
|
tracing on Haswell by up to 12% and by up to 5% for hybrid (Xeon)
|
||||||
|
- Memory conservative BVH for Xeon Phi now uses BVH node quantization
|
||||||
|
to lower memory footprint (requires half the memory footprint of the
|
||||||
|
default BVH)
|
||||||
|
|
||||||
|
### New Features in Embree 2.3
|
||||||
|
|
||||||
|
- Support for ray tracing hair geometry (Xeon and Xeon Phi)
|
||||||
|
- Catching errors through error callback function
|
||||||
|
- Faster hybrid traversal (Xeon and Xeon Phi)
|
||||||
|
- New memory conservative BVH for Xeon Phi
|
||||||
|
- Faster Morton code-based builder on Xeon
|
||||||
|
- Faster binned-SAH builder on Xeon Phi
|
||||||
|
- Lots of code cleanups/simplifications/improvements (Xeon and Xeon
|
||||||
|
Phi)
|
||||||
|
|
||||||
|
### New Features in Embree 2.2
|
||||||
|
|
||||||
|
- Support for motion blur on Xeon Phi
|
||||||
|
- Support for intersection filter callback functions
|
||||||
|
- Support for buffer sharing with the application
|
||||||
|
- Lots of AVX2 optimizations, e.g. \~20% faster 8-wide hybrid
|
||||||
|
traversal
|
||||||
|
- Experimental support for 8-wide (AVX/AVX2) and 16-wide BVHs (Xeon
|
||||||
|
Phi)
|
||||||
|
|
||||||
|
### New Features in Embree 2.1
|
||||||
|
|
||||||
|
- New future proof API with a strong focus on supporting dynamic
|
||||||
|
scenes
|
||||||
|
- Lots of optimizations for 8-wide AVX2 (Haswell architecture)
|
||||||
|
- Automatic runtime code selection for SSE, AVX, and AVX2
|
||||||
|
- Support for user-defined geometry
|
||||||
|
- New and improved BVH builders:
|
||||||
|
- Fast adaptive Morton code-based builder (without SAH-based
|
||||||
|
top-level rebuild)
|
||||||
|
- Both the SAH and Morton code-based builders got faster (Xeon
|
||||||
|
Phi)
|
||||||
|
- New variant of the SAH-based builder using triangle pre-splits
|
||||||
|
(Xeon Phi)
|
||||||
|
|
||||||
|
### New Features in Embree 2.0
|
||||||
|
|
||||||
|
- Support for the Intel® Xeon Phi™ coprocessor platform
|
||||||
|
- Support for high-performance "packet" kernels on SSE, AVX, and Xeon
|
||||||
|
Phi
|
||||||
|
- Integration with the Intel® Implicit SPMD Program Compiler (Intel® ISPC)
|
||||||
|
- Instantiation and fast BVH reconstruction
|
||||||
|
- Example photo-realistic rendering engine for both C++ and Intel® ISPC
|
||||||
|
|
||||||
710
Framework/external/embree/CMakeLists.txt
vendored
Normal file
710
Framework/external/embree/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,710 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
CMAKE_MINIMUM_REQUIRED(VERSION 3.10.0)
|
||||||
|
|
||||||
|
SET(EMBREE_VERSION_MAJOR 4)
|
||||||
|
SET(EMBREE_VERSION_MINOR 3)
|
||||||
|
SET(EMBREE_VERSION_PATCH 1)
|
||||||
|
SET(EMBREE_VERSION_NOTE "")
|
||||||
|
|
||||||
|
SET(EMBREE_VERSION ${EMBREE_VERSION_MAJOR}.${EMBREE_VERSION_MINOR}.${EMBREE_VERSION_PATCH})
|
||||||
|
MATH(EXPR EMBREE_VERSION_NUMBER "10000*${EMBREE_VERSION_MAJOR} + 100*${EMBREE_VERSION_MINOR} + ${EMBREE_VERSION_PATCH}")
|
||||||
|
SET(CPACK_RPM_PACKAGE_RELEASE 1)
|
||||||
|
|
||||||
|
PROJECT(embree${EMBREE_VERSION_MAJOR})
|
||||||
|
SET(EMBREE_PROJECT_COMPILATION ON)
|
||||||
|
|
||||||
|
include(CMakeDependentOption)
|
||||||
|
|
||||||
|
# We use our own strip tool on macOS to sign during install. This is required as CMake modifies RPATH of the binary during install.
|
||||||
|
IF (APPLE AND EMBREE_SIGN_FILE)
|
||||||
|
SET(EMBREE_STRIP ${CMAKE_STRIP})
|
||||||
|
SET(CMAKE_STRIP "${PROJECT_BINARY_DIR}/post_install_target.sh")
|
||||||
|
CONFIGURE_FILE(scripts/post_install_target.sh.in "${PROJECT_BINARY_DIR}/post_install_target.sh" @ONLY)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
MACRO (SIGN_TARGET target)
|
||||||
|
IF (EMBREE_SIGN_FILE)
|
||||||
|
IF (WIN32)
|
||||||
|
# on Windows we sign and do not strip as debug symbols not included in binary
|
||||||
|
ADD_CUSTOM_COMMAND(TARGET ${target} POST_BUILD
|
||||||
|
COMMAND ${EMBREE_SIGN_FILE} $<TARGET_FILE:${target}>)
|
||||||
|
ELSEIF (APPLE)
|
||||||
|
# on MacOSX we strip and sign here for testing purposes but also during install, as CMake modifies binary during install
|
||||||
|
ADD_CUSTOM_COMMAND(TARGET ${target} POST_BUILD
|
||||||
|
COMMAND ${EMBREE_STRIP} -x $<TARGET_FILE:${target}>
|
||||||
|
COMMAND ${EMBREE_SIGN_FILE} -o runtime -e "${CMAKE_SOURCE_DIR}/common/cmake/embree.entitlements" $<TARGET_FILE:${target}>)
|
||||||
|
ELSE()
|
||||||
|
# on Linux signing of binaries is not supported and stripping is done during install
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
# find git version
|
||||||
|
IF(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
|
||||||
|
FIND_PACKAGE(Git)
|
||||||
|
IF(GIT_FOUND)
|
||||||
|
EXECUTE_PROCESS(
|
||||||
|
COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE "EMBREE_HASH"
|
||||||
|
ERROR_QUIET
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_HASH 0)
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF(COMMAND cmake_policy)
|
||||||
|
if (POLICY CMP0003)
|
||||||
|
cmake_policy(SET CMP0003 NEW)
|
||||||
|
endif()
|
||||||
|
if (POLICY CMP0042)
|
||||||
|
cmake_policy(SET CMP0042 NEW)
|
||||||
|
endif()
|
||||||
|
if(POLICY CMP0072)
|
||||||
|
cmake_policy(SET CMP0072 NEW)
|
||||||
|
endif()
|
||||||
|
if(POLICY CMP0022)
|
||||||
|
cmake_policy(SET CMP0022 NEW)
|
||||||
|
endif()
|
||||||
|
if(POLICY CMP0074)
|
||||||
|
cmake_policy(SET CMP0074 NEW)
|
||||||
|
endif()
|
||||||
|
if(POLICY CMP0135)
|
||||||
|
cmake_policy(SET CMP0135 NEW)
|
||||||
|
endif()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(CMAKE_BACKWARDS_COMPATIBILITY)
|
||||||
|
MARK_AS_ADVANCED(EXECUTABLE_OUTPUT_PATH)
|
||||||
|
MARK_AS_ADVANCED(LIBRARY_OUTPUT_PATH)
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(CMAKE_OSX_ARCHITECTURES)
|
||||||
|
MARK_AS_ADVANCED(CMAKE_OSX_DEPLOYMENT_TARGET)
|
||||||
|
MARK_AS_ADVANCED(CMAKE_OSX_SYSROOT)
|
||||||
|
MARK_AS_ADVANCED(CLEAR CMAKE_CXX_COMPILER)
|
||||||
|
|
||||||
|
SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/common/cmake" ${CMAKE_MODULE_PATH})
|
||||||
|
|
||||||
|
INCLUDE(test)
|
||||||
|
|
||||||
|
set(BUILD_DOC OFF CACHE INTERNAL "build documentation (internal only)")
|
||||||
|
IF (BUILD_DOC)
|
||||||
|
ADD_SUBDIRECTORY(doc)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
OPTION(EMBREE_TUTORIALS "Enable to build Embree tutorials" ON)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Embree configuration
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
GET_PROPERTY(SHARED_LIBS_SUPPORTED GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
|
||||||
|
IF(SHARED_LIBS_SUPPORTED)
|
||||||
|
OPTION(EMBREE_STATIC_LIB "Build Embree as a static library.")
|
||||||
|
MARK_AS_ADVANCED(CLEAR EMBREE_STATIC_LIB)
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_STATIC_LIB ON CACHE BOOL "Build Embree as a static library." FORCE)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_STATIC_LIB)
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
SET(EMBREE_LIB_TYPE STATIC)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_STATIC_LIB)
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_LIB_TYPE SHARED)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
OPTION(EMBREE_ISPC_SUPPORT "Build Embree with support for ISPC applications." OFF)
|
||||||
|
IF (EMSCRIPTEN)
|
||||||
|
SET(EMBREE_ISPC_SUPPORT OFF CACHE BOOL "Build Embree with support for ISPC applications." FORCE)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(EMBREE_API_NAMESPACE "" CACHE STRING "C++ namespace to put API symbols into.")
|
||||||
|
SET(EMBREE_LIBRARY_NAME "embree${EMBREE_VERSION_MAJOR}" CACHE STRING "Name of the embree library file (default is embree${EMBREE_VERSION_MAJOR})")
|
||||||
|
|
||||||
|
IF (EMBREE_API_NAMESPACE AND EMBREE_ISPC_SUPPORT)
|
||||||
|
MESSAGE(FATAL_ERROR "You cannot enable ISPC when EMBREE_API_NAMESPACE is used.")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# Configurations (add configurations also to common/cmake/embree-config.cmake)
|
||||||
|
|
||||||
|
OPTION(EMBREE_ZIP_MODE "Create Embree ZIP package" ON)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_ZIP_MODE)
|
||||||
|
|
||||||
|
OPTION(EMBREE_INSTALL_DEPENDENCIES "Install Embree dependencies in binary packages and install" OFF)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_INSTALL_DEPENDENCIES)
|
||||||
|
|
||||||
|
OPTION(EMBREE_STAT_COUNTERS "Enables statistic counters.")
|
||||||
|
OPTION(EMBREE_STACK_PROTECTOR "When enabled Embree compiles with stack protection against return address overrides." OFF)
|
||||||
|
|
||||||
|
IF (NOT APPLE)
|
||||||
|
OPTION(EMBREE_SYCL_SUPPORT "Enables SYCL GPU support." OFF)
|
||||||
|
ENDIF()
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_LARGEGRF "Enables SYCL large GRF support." OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_DBG "Enables DPC++ debug info." OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_GEOMETRY_CALLBACK "Enabled geometry callbacks which are disabled by default for SYCL." OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_LARGEGRF)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_DBG)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_GEOMETRY_CALLBACK)
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_GEOMETRY_CALLBACK)
|
||||||
|
message(WARNING "Enabling EMBREE_SYCL_GEOMETRY_CALLBACK is experimental and may result in poor performance.")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
ADD_DEFINITIONS("-DEMBREE_SYCL_SUPPORT") # FIXME: only use define from rtcore_config.h
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_RT_SIMULATION "Using hardware simulation" OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_RT_SIMULATION)
|
||||||
|
IF (EMBREE_SYCL_RT_SIMULATION AND NOT EMBREE_SYCL_RT_VALIDATION_API AND EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||||
|
MESSAGE(FATAL_ERROR "Using EMBREE_SYCL_RT_SIMULATION requires EMBREE_SYCL_RT_VALIDATION_API=ON and EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS=OFF")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_SYCL_RT_SIMULATION)
|
||||||
|
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_SIMULATION")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
IF (EMBREE_SYCL_RT_VALIDATION_API)
|
||||||
|
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON "EMBREE_SYCL_RT_VALIDATION_API" OFF)
|
||||||
|
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||||
|
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_RT_VALIDATION_API)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(EMBREE_SYCL_AOT_DEVICES "none" CACHE STRING "SYCL devices to use for AOT compilation")
|
||||||
|
SET_PROPERTY(CACHE EMBREE_SYCL_AOT_DEVICES PROPERTY STRINGS none dg2 pvc XE_HPG_CORE XE_HPC_CORE)
|
||||||
|
|
||||||
|
SET(EMBREE_SYCL_AOT_DEVICE_REVISION 0)
|
||||||
|
IF (EMBREE_SYCL_AOT_DEVICES STREQUAL "dg2")
|
||||||
|
SET(EMBREE_SYCL_AOT_DEVICE_REVISION 4) # FIXME: bug workaround should get removed
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_L0_RTAS_BUILDER "Enable Level Zero RTAS builder" OFF "EMBREE_SYCL_SUPPORT" OFF)
|
||||||
|
IF (EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||||
|
ADD_DEFINITIONS("-DEMBREE_SYCL_L0_RTAS_BUILDER")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
OPTION(EMBREE_RAY_MASK "Enables ray mask support." ON)
|
||||||
|
OPTION(EMBREE_BACKFACE_CULLING "Enables backface culling.")
|
||||||
|
OPTION(EMBREE_BACKFACE_CULLING_CURVES "Enables backface culling for curve primitives." OFF)
|
||||||
|
OPTION(EMBREE_BACKFACE_CULLING_SPHERES "Enables backface culling for sphere primitives." OFF)
|
||||||
|
|
||||||
|
OPTION(EMBREE_FILTER_FUNCTION "Enables filter functions." ON)
|
||||||
|
OPTION(EMBREE_IGNORE_INVALID_RAYS "Ignores invalid rays." OFF) # FIXME: enable by default?
|
||||||
|
OPTION(EMBREE_COMPACT_POLYS "Enables double indexed poly layout." OFF)
|
||||||
|
OPTION(EMBREE_GEOMETRY_TRIANGLE "Enables support for triangle geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_QUAD "Enables support for quad geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_CURVE "Enables support for curve geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_SUBDIVISION "Enables support for subdiv geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_USER "Enables support for user geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_INSTANCE "Enables support for instances." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_INSTANCE_ARRAY "Enables support for instance arrays." ON)
|
||||||
|
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT 1 CACHE STRING "Maximum number of instance levels.")
|
||||||
|
|
||||||
|
IF (NOT EMBREE_GEOMETRY_INSTANCE AND NOT EMBREE_GEOMETRY_INSTANCE_ARRAY AND NOT EMBREE_MAX_INSTANCE_LEVEL_COUNT EQUAL 1)
|
||||||
|
message(FATAL_ERROR "EMBREE_MAX_INSTANCE_LEVEL_COUNT must be 1 when EMBREE_GEOMETRY_INSTANCE and EMBREE_GEOMETRY_INSTANCE_ARRAY are disabled")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
OPTION(EMBREE_GEOMETRY_GRID "Enables support for grid geometries." ON)
|
||||||
|
OPTION(EMBREE_GEOMETRY_POINT "Enables support for point geometries." ON)
|
||||||
|
|
||||||
|
OPTION(EMBREE_RAY_PACKETS "Enabled support for ray packets." ON)
|
||||||
|
|
||||||
|
|
||||||
|
SET(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0 CACHE STRING "Self intersection avoidance factor for flat curves. Specify floating point value in range 0 to inf.")
|
||||||
|
OPTION(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE "Enables self intersection avoidance for ray oriented discs." ON)
|
||||||
|
OPTION(EMBREE_MIN_WIDTH "Enables min-width feature to enlarge curve and point thickness to pixel width." OFF)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Platform detection and defaults
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
# detect ARM compilation
|
||||||
|
IF (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64"))
|
||||||
|
MESSAGE(STATUS "Building for Apple silicon")
|
||||||
|
SET(EMBREE_ARM ON)
|
||||||
|
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
|
||||||
|
MESSAGE(STATUS "Building for AArch64")
|
||||||
|
SET(EMBREE_ARM ON)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(EMBREE_TASKING_SYSTEM "INTERNAL" CACHE STRING "Selects tasking system")
|
||||||
|
#SET(EMBREE_TBB_COMPONENT "tbb" CACHE STRING "The TBB component/library name.")
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
SET_PROPERTY(CACHE EMBREE_TASKING_SYSTEM PROPERTY STRINGS TBB INTERNAL PPL)
|
||||||
|
ELSE()
|
||||||
|
SET_PROPERTY(CACHE EMBREE_TASKING_SYSTEM PROPERTY STRINGS TBB INTERNAL)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_TASKING_SYSTEM STREQUAL "TBB")
|
||||||
|
SET(TASKING_TBB ON )
|
||||||
|
SET(TASKING_INTERNAL OFF)
|
||||||
|
SET(TASKING_PPL OFF )
|
||||||
|
ADD_DEFINITIONS(-DTASKING_TBB)
|
||||||
|
LIST(APPEND ISPC_DEFINITIONS -DTASKING_TBB)
|
||||||
|
ELSEIF (EMBREE_TASKING_SYSTEM STREQUAL "PPL")
|
||||||
|
SET(TASKING_PPL ON )
|
||||||
|
SET(TASKING_TBB OFF )
|
||||||
|
SET(TASKING_INTERNAL OFF)
|
||||||
|
ADD_DEFINITIONS(-DTASKING_PPL)
|
||||||
|
LIST(APPEND ISPC_DEFINITIONS -DTASKING_PPL)
|
||||||
|
ELSE()
|
||||||
|
SET(TASKING_INTERNAL ON )
|
||||||
|
SET(TASKING_TBB OFF)
|
||||||
|
SET(TASKING_PPL OFF )
|
||||||
|
ADD_DEFINITIONS(-DTASKING_INTERNAL)
|
||||||
|
LIST(APPEND ISPC_DEFINITIONS -DTASKING_INTERNAL)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Compiler
|
||||||
|
##############################################################
|
||||||
|
# TODO: fixme, define relwithassert also for icc, msvc, ... or remove when debug build with dpcpp is useable
|
||||||
|
IF(EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;RelWithAssert")
|
||||||
|
ELSE()
|
||||||
|
SET(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# Validate build type
|
||||||
|
IF (CMAKE_BUILD_TYPE)
|
||||||
|
string(FIND "${CONFIGURATION_TYPES}" "${CMAKE_BUILD_TYPE}" MATCHED_CONFIG)
|
||||||
|
IF (${MATCHED_CONFIG} EQUAL -1)
|
||||||
|
message(FATAL_ERROR "CMAKE_BUILD_TYPE (${CMAKE_BUILD_TYPE}) allows only the following values: ${CONFIGURATION_TYPES}")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
message(DEBUG "CMAKE_GENERATOR_TOOLSET: ${CMAKE_GENERATOR_TOOLSET}")
|
||||||
|
message(DEBUG "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
|
||||||
|
message(DEBUG "CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}")
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
IF (NOT DEFAULT_CMAKE_CONFIGURATION_TYPES_SET)
|
||||||
|
SET(CMAKE_CONFIGURATION_TYPES "${CONFIGURATION_TYPES}" CACHE STRING "List of generated configurations." FORCE)
|
||||||
|
SET(DEFAULT_CMAKE_CONFIGURATION_TYPES_SET ON CACHE INTERNAL "Default CMake configuration types set.")
|
||||||
|
ENDIF()
|
||||||
|
SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||||
|
|
||||||
|
IF ((${CMAKE_CXX_COMPILER_ID} MATCHES "IntelLLVM") OR
|
||||||
|
(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" AND ${CMAKE_CXX_COMPILER} MATCHES "icx") OR
|
||||||
|
(${CMAKE_CXX_COMPILER} MATCHES "dpcpp"))
|
||||||
|
MESSAGE("-- DPCPP compiler detected")
|
||||||
|
INCLUDE(dpcpp)
|
||||||
|
ELSEIF(${CMAKE_GENERATOR_TOOLSET} MATCHES "^LLVM")
|
||||||
|
set(EMBREE_SYCL_SUPPORT OFF)
|
||||||
|
MESSAGE("-- CLANG compiler detected")
|
||||||
|
INCLUDE(clang)
|
||||||
|
ELSEIF(${CMAKE_GENERATOR_TOOLSET} MATCHES "^Intel")
|
||||||
|
set(EMBREE_SYCL_SUPPORT OFF)
|
||||||
|
MESSAGE("-- Intel compiler detected")
|
||||||
|
INCLUDE (intel)
|
||||||
|
ELSEIF(${CMAKE_CXX_COMPILER_FRONTEND_VARIANT} MATCHES "GNU")
|
||||||
|
set(EMBREE_SYCL_SUPPORT OFF)
|
||||||
|
MESSAGE("-- GNU-like compiler detected")
|
||||||
|
INCLUDE(gnu)
|
||||||
|
ELSE()
|
||||||
|
set(EMBREE_SYCL_SUPPORT OFF)
|
||||||
|
IF (EMBREE_ISA_AVX512)
|
||||||
|
MESSAGE(FATAL_ERROR "Microsoft Visual C++ Compiler does not support AVX512. Please use Intel Compiler or Clang.")
|
||||||
|
ENDIF()
|
||||||
|
MESSAGE("-- MSVC detected")
|
||||||
|
INCLUDE (msvc)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ELSE (WIN32)
|
||||||
|
IF(CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
|
||||||
|
INCLUDE (crayprgenv)
|
||||||
|
ELSE()
|
||||||
|
GET_FILENAME_COMPONENT(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME)
|
||||||
|
|
||||||
|
STRING(TOLOWER "${CMAKE_CXX_COMPILER_ID}" _LOWER_CXX_COMPILER_ID)
|
||||||
|
STRING(REPLACE "appleclang" "clang" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
|
||||||
|
STRING(REPLACE "intelllvm" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
|
||||||
|
IF(${_LOWER_CXX_COMPILER_ID} MATCHES "clang" AND ${CXX_COMPILER_NAME} MATCHES "icpx")
|
||||||
|
STRING(REPLACE "clang" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
|
||||||
|
ENDIF()
|
||||||
|
IF(${_LOWER_CXX_COMPILER_ID} MATCHES "clang" AND ${CXX_COMPILER_NAME} MATCHES "dpcpp")
|
||||||
|
STRING(REPLACE "clang" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(_LOWER_CXX_COMPILER_ID "dpcpp")
|
||||||
|
ENDIF()
|
||||||
|
STRING(TOUPPER "${_LOWER_CXX_COMPILER_ID}" _UPPER_CXX_COMPILER_ID)
|
||||||
|
MESSAGE("-- ${_UPPER_CXX_COMPILER_ID} detected")
|
||||||
|
INCLUDE(${_LOWER_CXX_COMPILER_ID} OPTIONAL RESULT_VARIABLE COMPILER_FOUND)
|
||||||
|
IF (NOT COMPILER_FOUND)
|
||||||
|
MESSAGE(FATAL_ERROR "Unsupported compiler: " ${CMAKE_CXX_COMPILER_ID})
|
||||||
|
ENDIF ()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF(NOT CMAKE_BUILD_TYPE)
|
||||||
|
SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Specifies the build type." FORCE)
|
||||||
|
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CONFIGURATION_TYPES})
|
||||||
|
ENDIF(NOT CMAKE_BUILD_TYPE)
|
||||||
|
|
||||||
|
OPTION(CMAKE_VERBOSE_MAKEFILE "Enables verbose mode.")
|
||||||
|
MARK_AS_ADVANCED(CLEAR CMAKE_VERBOSE_MAKEFILE)
|
||||||
|
|
||||||
|
ENDIF (WIN32)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# ISA configuration
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
# just for compatibility with old naming
|
||||||
|
IF(DEFINED EMBREE_ISA_AVX512SKX)
|
||||||
|
UNSET(EMBREE_ISA_AVX512 CACHE)
|
||||||
|
SET(EMBREE_ISA_AVX512 ${EMBREE_ISA_AVX512SKX} CACHE BOOL "")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
|
||||||
|
SET(EMBREE_MAX_ISA "DEFAULT" CACHE STRING "Selects highest ISA to support.")
|
||||||
|
ELSEIF (EMSCRIPTEN)
|
||||||
|
SET(EMBREE_MAX_ISA "SSE2" CACHE STRING "Selects highest ISA to support.")
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_MAX_ISA "NONE" CACHE STRING "Selects highest ISA to support.")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X)
|
||||||
|
ELSE()
|
||||||
|
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_MAX_ISA STREQUAL "NONE")
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
IF (APPLE)
|
||||||
|
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." OFF)
|
||||||
|
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." ON)
|
||||||
|
ELSE()
|
||||||
|
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." ON)
|
||||||
|
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF)
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
|
||||||
|
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
|
||||||
|
TRY_COMPILE(COMPILER_SUPPORTS_AVX512 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX512})
|
||||||
|
|
||||||
|
OPTION(EMBREE_ISA_SSE2 "Enables SSE2 ISA." ON)
|
||||||
|
OPTION(EMBREE_ISA_SSE42 "Enables SSE4.2 ISA." ON)
|
||||||
|
OPTION(EMBREE_ISA_AVX "Enables AVX ISA." ${COMPILER_SUPPORTS_AVX})
|
||||||
|
OPTION(EMBREE_ISA_AVX2 "Enables AVX2 ISA." ${COMPILER_SUPPORTS_AVX2})
|
||||||
|
IF (WIN32 OR APPLE)
|
||||||
|
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." OFF)
|
||||||
|
ELSE()
|
||||||
|
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." ${COMPILER_SUPPORTS_AVX512})
|
||||||
|
ENDIF()
|
||||||
|
# Don't use OPTION, but still set them to OFF, so that embree-config.cmake is consisten with its definitions
|
||||||
|
SET(EMBREE_ISA_NEON OFF)
|
||||||
|
SET(EMBREE_ISA_NEON2X OFF)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
|
||||||
|
UNSET(EMBREE_ISA_NEON CACHE)
|
||||||
|
UNSET(EMBREE_ISA_NEON2X CACHE)
|
||||||
|
UNSET(EMBREE_ISA_SSE2 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_SSE42 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX2 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX512 CACHE)
|
||||||
|
SET(EMBREE_ISA_NEON OFF)
|
||||||
|
SET(EMBREE_ISA_NEON2X OFF)
|
||||||
|
SET(EMBREE_ISA_SSE2 OFF)
|
||||||
|
SET(EMBREE_ISA_SSE42 OFF)
|
||||||
|
SET(EMBREE_ISA_AVX OFF)
|
||||||
|
SET(EMBREE_ISA_AVX2 OFF)
|
||||||
|
SET(EMBREE_ISA_AVX512 OFF)
|
||||||
|
MESSAGE(STATUS "Detecting default ISA...")
|
||||||
|
INCLUDE(check_isa_default)
|
||||||
|
CHECK_ISA_DEFAULT(EMBREE_ISA_DEFAULT)
|
||||||
|
MESSAGE(STATUS "Detected default ISA: ${EMBREE_ISA_DEFAULT}")
|
||||||
|
SET(EMBREE_ISA_${EMBREE_ISA_DEFAULT} ON)
|
||||||
|
|
||||||
|
ELSE()
|
||||||
|
UNSET(EMBREE_ISA_NEON CACHE)
|
||||||
|
UNSET(EMBREE_ISA_NEON2X CACHE)
|
||||||
|
UNSET(EMBREE_ISA_SSE2 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_SSE42 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX2 CACHE)
|
||||||
|
UNSET(EMBREE_ISA_AVX512 CACHE)
|
||||||
|
|
||||||
|
IF(EMBREE_MAX_ISA STREQUAL "NEON")
|
||||||
|
SET(ISA 1)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "NEON2X")
|
||||||
|
SET(ISA 2)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE2")
|
||||||
|
SET(ISA 1)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE4.2")
|
||||||
|
SET(ISA 2)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX")
|
||||||
|
SET(ISA 3)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX2")
|
||||||
|
SET(ISA 4)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX512")
|
||||||
|
SET(ISA 5)
|
||||||
|
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX512SKX") # just for compatibility
|
||||||
|
SET(ISA 5)
|
||||||
|
ELSE()
|
||||||
|
MESSAGE(FATAL_ERROR "Unsupported ISA specified: " ${EMBREE_MAX_ISA})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(EMBREE_ISA_NEON OFF)
|
||||||
|
SET(EMBREE_ISA_NEON2X OFF)
|
||||||
|
SET(EMBREE_ISA_SSE2 OFF)
|
||||||
|
SET(EMBREE_ISA_SSE42 OFF)
|
||||||
|
SET(EMBREE_ISA_AVX OFF)
|
||||||
|
SET(EMBREE_ISA_AVX2 OFF)
|
||||||
|
SET(EMBREE_ISA_AVX512 OFF)
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
IF (ISA GREATER 0)
|
||||||
|
SET(EMBREE_ISA_NEON ON)
|
||||||
|
ENDIF ()
|
||||||
|
IF (ISA GREATER 1)
|
||||||
|
SET(EMBREE_ISA_NEON2X ON)
|
||||||
|
ENDIF ()
|
||||||
|
ELSE()
|
||||||
|
IF (ISA GREATER 0)
|
||||||
|
SET(EMBREE_ISA_SSE2 ON)
|
||||||
|
ENDIF ()
|
||||||
|
IF (ISA GREATER 1)
|
||||||
|
SET(EMBREE_ISA_SSE42 ON)
|
||||||
|
ENDIF ()
|
||||||
|
IF (ISA GREATER 2)
|
||||||
|
SET(EMBREE_ISA_AVX ON)
|
||||||
|
ENDIF ()
|
||||||
|
IF (ISA GREATER 3)
|
||||||
|
SET(EMBREE_ISA_AVX2 ON)
|
||||||
|
ENDIF ()
|
||||||
|
IF (ISA GREATER 4)
|
||||||
|
SET(EMBREE_ISA_AVX512 ON)
|
||||||
|
ENDIF ()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF(CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
|
||||||
|
SET(EMBREE_ISA_SSE42 OFF)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (APPLE AND EMBREE_STATIC_LIB)
|
||||||
|
|
||||||
|
# count number of set ISAs
|
||||||
|
SET(NUMISA 0)
|
||||||
|
IF (EMBREE_ISA_NEON)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_NEON2X)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_SSE2)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_SSE42)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_AVX)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_AVX2)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_ISA_AVX512)
|
||||||
|
MATH(EXPR NUMISA "${NUMISA}+1")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (NUMISA GREATER 1)
|
||||||
|
IF (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
||||||
|
IF (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
|
||||||
|
MESSAGE(FATAL_ERROR "Using Embree as static library is not supported with AppleClang >= 9.0 when multiple ISAs are selected. Please either build a shared library or enable only one ISA.")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# ISA configuration continued
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
SET(SSE2 0)
|
||||||
|
SET(SSE42 1)
|
||||||
|
SET(AVX 2)
|
||||||
|
SET(AVX2 3)
|
||||||
|
SET(AVX512 4)
|
||||||
|
|
||||||
|
UNSET(FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST -1)
|
||||||
|
SET(ISA_LOWEST_AVX 2)
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
IF (EMBREE_ISA_NEON2X)
|
||||||
|
LIST(APPEND ISPC_TARGETS "neon-i32x8")
|
||||||
|
ELSEIF (EMBREE_ISA_NEON)
|
||||||
|
LIST(APPEND ISPC_TARGETS "neon-i32x4")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_NEON)
|
||||||
|
SET(EMBREE_ISA_SSE2 ON)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_NEON2X)
|
||||||
|
SET(EMBREE_ISA_SSE2 OFF)
|
||||||
|
SET(EMBREE_ISA_SSE42 OFF)
|
||||||
|
SET(EMBREE_ISA_AVX OFF)
|
||||||
|
SET(EMBREE_ISA_AVX2 ON)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_SSE2)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE2)
|
||||||
|
IF (NOT EMBREE_ARM)
|
||||||
|
LIST(APPEND ISPC_TARGETS "sse2")
|
||||||
|
ENDIF()
|
||||||
|
IF(NOT FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST ${SSE2})
|
||||||
|
SET(FLAGS_LOWEST ${FLAGS_SSE2})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_SSE42)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE42)
|
||||||
|
IF (NOT EMBREE_ARM)
|
||||||
|
LIST(APPEND ISPC_TARGETS "sse4")
|
||||||
|
ENDIF()
|
||||||
|
IF(NOT FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST ${SSE42})
|
||||||
|
SET(FLAGS_LOWEST ${FLAGS_SSE42})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX)
|
||||||
|
IF (NOT EMBREE_ARM)
|
||||||
|
LIST(APPEND ISPC_TARGETS "avx")
|
||||||
|
ENDIF()
|
||||||
|
IF(NOT FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST ${AVX})
|
||||||
|
SET(ISA_LOWEST_AVX ${AVX})
|
||||||
|
SET(FLAGS_LOWEST ${FLAGS_AVX})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX2)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX2)
|
||||||
|
IF (NOT EMBREE_ARM)
|
||||||
|
LIST(APPEND ISPC_TARGETS "avx2")
|
||||||
|
ENDIF()
|
||||||
|
IF(NOT FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST ${AVX2})
|
||||||
|
SET(ISA_LOWEST_AVX ${AVX2})
|
||||||
|
SET(FLAGS_LOWEST ${FLAGS_AVX2})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX512)
|
||||||
|
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX512)
|
||||||
|
IF (NOT EMBREE_ARM)
|
||||||
|
LIST(APPEND ISPC_TARGETS "avx512skx-i32x16")
|
||||||
|
ENDIF()
|
||||||
|
IF(NOT FLAGS_LOWEST)
|
||||||
|
SET(ISA_LOWEST ${AVX512})
|
||||||
|
SET(ISA_LOWEST_AVX ${AVX512})
|
||||||
|
SET(FLAGS_LOWEST ${FLAGS_AVX512})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF (ISA_LOWEST EQUAL -1)
|
||||||
|
MESSAGE(FATAL_ERROR "You have to enable at least one ISA!")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE (ispc)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Create Binary Packages (uses above config options)
|
||||||
|
##############################################################
|
||||||
|
include(package)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Create Config files
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
CONFIGURE_FILE(
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/config.h.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/config.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
|
CONFIGURE_FILE(
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/rtcore_config.h.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/include/embree4/rtcore_config.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
CONFIGURE_FILE(
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/hash.h.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/hash.h"
|
||||||
|
)
|
||||||
|
CONFIGURE_FILE(
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/export.linux.map.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/export.linux.map"
|
||||||
|
)
|
||||||
|
CONFIGURE_FILE(
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/export.macosx.map.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/kernels/export.macosx.map"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Output paths
|
||||||
|
##############################################################
|
||||||
|
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Directories to compile
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
ADD_SUBDIRECTORY(common)
|
||||||
|
ADD_SUBDIRECTORY(kernels)
|
||||||
|
|
||||||
|
IF (EMBREE_TUTORIALS)
|
||||||
|
ADD_SUBDIRECTORY(tutorials)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Uninstall
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
IF (NOT TARGET uninstall)
|
||||||
|
configure_file(
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/common/cmake/uninstall.cmake.in"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/uninstall.cmake"
|
||||||
|
IMMEDIATE @ONLY)
|
||||||
|
|
||||||
|
add_custom_target(uninstall
|
||||||
|
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/uninstall.cmake")
|
||||||
|
|
||||||
|
SET_PROPERTY(TARGET uninstall PROPERTY FOLDER CMakePredefinedTargets)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Has to be last
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
INCLUDE(CPack)
|
||||||
103
Framework/external/embree/CMakePresets.json
vendored
Normal file
103
Framework/external/embree/CMakePresets.json
vendored
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
{
|
||||||
|
"version": 4,
|
||||||
|
"cmakeMinimumRequired": {
|
||||||
|
"major": 3,
|
||||||
|
"minor": 11,
|
||||||
|
"patch": 0
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"scripts/cmake-presets/os.json",
|
||||||
|
"scripts/cmake-presets/package.json",
|
||||||
|
"scripts/cmake-presets/compiler.json",
|
||||||
|
"scripts/cmake-presets/tbb.json",
|
||||||
|
"scripts/cmake-presets/continuous.json",
|
||||||
|
"scripts/cmake-presets/nightly.json",
|
||||||
|
"scripts/cmake-presets/release.json",
|
||||||
|
"scripts/cmake-presets/integrate.json",
|
||||||
|
"scripts/cmake-presets/performance.json"
|
||||||
|
],
|
||||||
|
"configurePresets": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "package-linux-icx-ispc1_19_0-tbb2021_9_0-perf",
|
||||||
|
"inherits": ["package-linux", "env", "icx", "ispc1_19_0", "tbb2021_9_0"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_MAX_ISA": "AVX512",
|
||||||
|
"EMBREE_USE_GOOGLE_BENCHMARK": "ON",
|
||||||
|
"EMBREE_BUILD_GOOGLE_BENCHMARK_FROM_SOURCE": "ON"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "package-linux-icx-sycl-ispc1_19_0-tbb2021_9_0-perf",
|
||||||
|
"inherits": ["package-linux", "env", "icx", "ispc1_19_0", "tbb2021_9_0"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_SYCL_SUPPORT": "ON",
|
||||||
|
"EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF",
|
||||||
|
"EMBREE_SYCL_AOT_DEVICES": "none",
|
||||||
|
"EMBREE_MAX_ISA": "AVX512",
|
||||||
|
"EMBREE_USE_GOOGLE_BENCHMARK": "ON",
|
||||||
|
"EMBREE_BUILD_GOOGLE_BENCHMARK_FROM_SOURCE": "ON"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "package-windows-v141-ispc1_19_0-tbb2021_9_0",
|
||||||
|
"inherits": ["package-windows", "env", "v141", "ispc1_19_0", "tbb2021_9_0"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_MAX_ISA": "SSE2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "package-windows-v140-ispc1_19_0-tbb2021_9_0",
|
||||||
|
"inherits": ["package-windows", "env", "v140", "ispc1_19_0", "tbb2021_9_0"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_MAX_ISA": "SSE2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "package-windows-icx-sycl-ispc1_19_0-tbb2021_9_0",
|
||||||
|
"inherits": ["package-windows", "env", "icx-windows", "ispc1_19_0", "tbb2021_9_0"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_SYCL_SUPPORT": "ON",
|
||||||
|
"EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF",
|
||||||
|
"EMBREE_SYCL_AOT_DEVICES": "none",
|
||||||
|
"EMBREE_MAX_ISA": "AVX512"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "linux-coverity",
|
||||||
|
"inherits": ["package-linux", "env", "gcc", "tasking_internal"],
|
||||||
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_BUILD_TYPE": "Release",
|
||||||
|
"EMBREE_MAX_ISA": "SSE2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
6
Framework/external/embree/CTestConfig.cmake
vendored
Normal file
6
Framework/external/embree/CTestConfig.cmake
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set(CTEST_PROJECT_NAME "Embree")
|
||||||
|
set(TEST_MODELS_HASH 05b5a61035485d3090868f9abf5cc057d1e31101)
|
||||||
|
|
||||||
202
Framework/external/embree/LICENSE.txt
vendored
Normal file
202
Framework/external/embree/LICENSE.txt
vendored
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
9926
Framework/external/embree/README.md
vendored
Normal file
9926
Framework/external/embree/README.md
vendored
Normal file
File diff suppressed because it is too large
Load diff
14
Framework/external/embree/SECURITY.md
vendored
Normal file
14
Framework/external/embree/SECURITY.md
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
Security Policy
|
||||||
|
===============
|
||||||
|
|
||||||
|
Intel is committed to rapidly addressing security vulnerabilities
|
||||||
|
affecting our customers and providing clear guidance on the solution,
|
||||||
|
impact, severity and mitigation.
|
||||||
|
|
||||||
|
Reporting a Vulnerability
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Please [report any security vulnerabilities][guidelines] in this project
|
||||||
|
utilizing the [guidelines here][guidelines].
|
||||||
|
|
||||||
|
[guidelines]: https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html "Vulnerability Handling Guidelines"
|
||||||
8
Framework/external/embree/common/CMakeLists.txt
vendored
Normal file
8
Framework/external/embree/common/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ADD_SUBDIRECTORY(sys)
|
||||||
|
ADD_SUBDIRECTORY(math)
|
||||||
|
ADD_SUBDIRECTORY(simd)
|
||||||
|
ADD_SUBDIRECTORY(lexers)
|
||||||
|
ADD_SUBDIRECTORY(tasking)
|
||||||
56
Framework/external/embree/common/algorithms/parallel_any_of.h
vendored
Normal file
56
Framework/external/embree/common/algorithms/parallel_any_of.h
vendored
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include "parallel_reduce.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
|
||||||
|
template<typename Index, class UnaryPredicate>
|
||||||
|
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
|
||||||
|
{
|
||||||
|
std::atomic_bool ret;
|
||||||
|
ret = false;
|
||||||
|
|
||||||
|
#if defined(TASKING_TBB)
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred,&context](const tbb::blocked_range<size_t>& r) {
|
||||||
|
if (context.is_group_execution_cancelled()) return;
|
||||||
|
for (size_t i = r.begin(); i != r.end(); ++i) {
|
||||||
|
if (pred(i)) {
|
||||||
|
ret = true;
|
||||||
|
context.cancel_group_execution();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#else
|
||||||
|
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred](const tbb::blocked_range<size_t>& r) {
|
||||||
|
if (tbb::task::self().is_cancelled()) return;
|
||||||
|
for (size_t i = r.begin(); i != r.end(); ++i) {
|
||||||
|
if (pred(i)) {
|
||||||
|
ret = true;
|
||||||
|
tbb::task::self().cancel_group_execution();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
ret = parallel_reduce (first, last, false, [pred](const range<size_t>& r)->bool {
|
||||||
|
bool localret = false;
|
||||||
|
for (auto i=r.begin(); i<r.end(); ++i) {
|
||||||
|
localret |= pred(i);
|
||||||
|
}
|
||||||
|
return localret;
|
||||||
|
},
|
||||||
|
std::bit_or<bool>()
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
93
Framework/external/embree/common/algorithms/parallel_filter.h
vendored
Normal file
93
Framework/external/embree/common/algorithms/parallel_filter.h
vendored
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Ty, typename Index, typename Predicate>
|
||||||
|
inline Index sequential_filter( Ty* data, const Index first, const Index last, const Predicate& predicate)
|
||||||
|
{
|
||||||
|
Index j = first;
|
||||||
|
for (Index i=first; i<last; i++)
|
||||||
|
if (predicate(data[i]))
|
||||||
|
data[j++] = data[i];
|
||||||
|
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty, typename Index, typename Predicate>
|
||||||
|
inline Index parallel_filter( Ty* data, const Index begin, const Index end, const Index minStepSize, const Predicate& predicate)
|
||||||
|
{
|
||||||
|
/* sequential fallback */
|
||||||
|
if (end-begin <= minStepSize)
|
||||||
|
return sequential_filter(data,begin,end,predicate);
|
||||||
|
|
||||||
|
/* calculate number of tasks to use */
|
||||||
|
enum { MAX_TASKS = 64 };
|
||||||
|
const Index numThreads = TaskScheduler::threadCount();
|
||||||
|
const Index numBlocks = (end-begin+minStepSize-1)/minStepSize;
|
||||||
|
const Index taskCount = min(numThreads,numBlocks,(Index)MAX_TASKS);
|
||||||
|
|
||||||
|
/* filter blocks */
|
||||||
|
Index nused[MAX_TASKS];
|
||||||
|
Index nfree[MAX_TASKS];
|
||||||
|
parallel_for(taskCount, [&](const Index taskIndex)
|
||||||
|
{
|
||||||
|
const Index i0 = begin+(taskIndex+0)*(end-begin)/taskCount;
|
||||||
|
const Index i1 = begin+(taskIndex+1)*(end-begin)/taskCount;
|
||||||
|
const Index i2 = sequential_filter(data,i0,i1,predicate);
|
||||||
|
nused[taskIndex] = i2-i0;
|
||||||
|
nfree[taskIndex] = i1-i2;
|
||||||
|
});
|
||||||
|
|
||||||
|
/* calculate offsets */
|
||||||
|
Index sused=0;
|
||||||
|
Index sfree=0;
|
||||||
|
Index pfree[MAX_TASKS];
|
||||||
|
for (Index i=0; i<taskCount; i++)
|
||||||
|
{
|
||||||
|
sused+=nused[i];
|
||||||
|
Index cfree = nfree[i]; pfree[i] = sfree; sfree+=cfree;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return if we did not filter out any element */
|
||||||
|
assert(sfree <= end-begin);
|
||||||
|
assert(sused <= end-begin);
|
||||||
|
if (sused == end-begin)
|
||||||
|
return end;
|
||||||
|
|
||||||
|
/* otherwise we have to copy misplaced elements around */
|
||||||
|
parallel_for(taskCount, [&](const Index taskIndex)
|
||||||
|
{
|
||||||
|
/* destination to write elements to */
|
||||||
|
Index dst = begin+(taskIndex+0)*(end-begin)/taskCount+nused[taskIndex];
|
||||||
|
Index dst_end = min(dst+nfree[taskIndex],begin+sused);
|
||||||
|
if (dst_end <= dst) return;
|
||||||
|
|
||||||
|
/* range of misplaced elements to copy to destination */
|
||||||
|
Index r0 = pfree[taskIndex];
|
||||||
|
Index r1 = r0+dst_end-dst;
|
||||||
|
|
||||||
|
/* find range in misplaced elements in back to front order */
|
||||||
|
Index k0=0;
|
||||||
|
for (Index i=taskCount-1; i>0; i--)
|
||||||
|
{
|
||||||
|
if (k0 > r1) break;
|
||||||
|
Index k1 = k0+nused[i];
|
||||||
|
Index src = begin+(i+0)*(end-begin)/taskCount+nused[i];
|
||||||
|
for (Index i=max(r0,k0); i<min(r1,k1); i++) {
|
||||||
|
Index isrc = src-i+k0-1;
|
||||||
|
assert(dst >= begin && dst < end);
|
||||||
|
assert(isrc >= begin && isrc < end);
|
||||||
|
data[dst++] = data[isrc];
|
||||||
|
}
|
||||||
|
k0 = k1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return begin+sused;
|
||||||
|
}
|
||||||
|
}
|
||||||
161
Framework/external/embree/common/algorithms/parallel_for.h
vendored
Normal file
161
Framework/external/embree/common/algorithms/parallel_for.h
vendored
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../tasking/taskscheduler.h"
|
||||||
|
#include "../sys/array.h"
|
||||||
|
#include "../math/emath.h"
|
||||||
|
#include "../math/range.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/* parallel_for without range */
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for( const Index N, const Func& func)
|
||||||
|
{
|
||||||
|
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||||
|
if (N) {
|
||||||
|
TaskScheduler::TaskGroupContext context;
|
||||||
|
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
|
||||||
|
assert(r.size() == 1);
|
||||||
|
func(r.begin());
|
||||||
|
},&context);
|
||||||
|
TaskScheduler::wait();
|
||||||
|
if (context.cancellingException != nullptr) {
|
||||||
|
std::rethrow_exception(context.cancellingException);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(TASKING_TBB)
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
},context);
|
||||||
|
if (context.is_group_execution_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#else
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
});
|
||||||
|
if (tbb::task::self().is_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#elif defined(TASKING_PPL)
|
||||||
|
concurrency::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
});
|
||||||
|
#else
|
||||||
|
# error "no tasking system enabled"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* parallel for with range and granulatity */
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
|
||||||
|
{
|
||||||
|
assert(first <= last);
|
||||||
|
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||||
|
TaskScheduler::TaskGroupContext context;
|
||||||
|
TaskScheduler::spawn(first,last,minStepSize,func,&context);
|
||||||
|
TaskScheduler::wait();
|
||||||
|
if (context.cancellingException != nullptr) {
|
||||||
|
std::rethrow_exception(context.cancellingException);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(TASKING_TBB)
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||||
|
func(range<Index>(r.begin(),r.end()));
|
||||||
|
},context);
|
||||||
|
if (context.is_group_execution_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#else
|
||||||
|
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||||
|
func(range<Index>(r.begin(),r.end()));
|
||||||
|
});
|
||||||
|
if (tbb::task::self().is_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#elif defined(TASKING_PPL)
|
||||||
|
concurrency::parallel_for(first, last, Index(1) /*minStepSize*/, [&](Index i) {
|
||||||
|
func(range<Index>(i,i+1));
|
||||||
|
});
|
||||||
|
|
||||||
|
#else
|
||||||
|
# error "no tasking system enabled"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* parallel for with range */
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for( const Index first, const Index last, const Func& func)
|
||||||
|
{
|
||||||
|
assert(first <= last);
|
||||||
|
parallel_for(first,last,(Index)1,func);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION > 4001)
|
||||||
|
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for_static( const Index N, const Func& func)
|
||||||
|
{
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
},tbb::simple_partitioner(),context);
|
||||||
|
if (context.is_group_execution_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#else
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
},tbb::simple_partitioner());
|
||||||
|
if (tbb::task::self().is_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef tbb::affinity_partitioner affinity_partitioner;
|
||||||
|
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for_affinity( const Index N, const Func& func, tbb::affinity_partitioner& ap)
|
||||||
|
{
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
},ap,context);
|
||||||
|
if (context.is_group_execution_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#else
|
||||||
|
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||||
|
func(i);
|
||||||
|
},ap);
|
||||||
|
if (tbb::task::self().is_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for_static( const Index N, const Func& func)
|
||||||
|
{
|
||||||
|
parallel_for(N,func);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct affinity_partitioner {
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, typename Func>
|
||||||
|
__forceinline void parallel_for_affinity( const Index N, const Func& func, affinity_partitioner& ap)
|
||||||
|
{
|
||||||
|
parallel_for(N,func);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
160
Framework/external/embree/common/algorithms/parallel_for_for.h
vendored
Normal file
160
Framework/external/embree/common/algorithms/parallel_for_for.h
vendored
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename ArrayArray, typename Func>
|
||||||
|
__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
|
||||||
|
{
|
||||||
|
size_t k=0;
|
||||||
|
for (size_t i=0; i!=array2.size(); ++i) {
|
||||||
|
const size_t N = array2[i]->size();
|
||||||
|
if (N) func(array2[i],range<size_t>(0,N),k);
|
||||||
|
k+=N;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class ParallelForForState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum { MAX_TASKS = 64 };
|
||||||
|
|
||||||
|
__forceinline ParallelForForState ()
|
||||||
|
: taskCount(0) {}
|
||||||
|
|
||||||
|
template<typename ArrayArray>
|
||||||
|
__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
|
||||||
|
init(array2,minStepSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SizeFunc>
|
||||||
|
__forceinline ParallelForForState (const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) {
|
||||||
|
init(numArrays,getSize,minStepSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SizeFunc>
|
||||||
|
__forceinline void init ( const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize )
|
||||||
|
{
|
||||||
|
/* first calculate total number of elements */
|
||||||
|
size_t N = 0;
|
||||||
|
for (size_t i=0; i<numArrays; i++) {
|
||||||
|
N += getSize(i);
|
||||||
|
}
|
||||||
|
this->N = N;
|
||||||
|
|
||||||
|
/* calculate number of tasks to use */
|
||||||
|
const size_t numThreads = TaskScheduler::threadCount();
|
||||||
|
const size_t numBlocks = (N+minStepSize-1)/minStepSize;
|
||||||
|
taskCount = max(size_t(1),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS)));
|
||||||
|
|
||||||
|
/* calculate start (i,j) for each task */
|
||||||
|
size_t taskIndex = 0;
|
||||||
|
i0[taskIndex] = 0;
|
||||||
|
j0[taskIndex] = 0;
|
||||||
|
size_t k0 = (++taskIndex)*N/taskCount;
|
||||||
|
for (size_t i=0, k=0; taskIndex < taskCount; i++)
|
||||||
|
{
|
||||||
|
assert(i<numArrays);
|
||||||
|
size_t j=0, M = getSize(i);
|
||||||
|
while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
|
||||||
|
assert(taskIndex<taskCount);
|
||||||
|
i0[taskIndex] = i;
|
||||||
|
j0[taskIndex] = j += k0-k;
|
||||||
|
k=k0;
|
||||||
|
k0 = (++taskIndex)*N/taskCount;
|
||||||
|
}
|
||||||
|
k+=M-j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray>
|
||||||
|
__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
|
||||||
|
{
|
||||||
|
init(array2.size(),[&](size_t i) { return array2[i] ? array2[i]->size() : 0; },minStepSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline size_t size() const {
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
size_t i0[MAX_TASKS];
|
||||||
|
size_t j0[MAX_TASKS];
|
||||||
|
size_t taskCount;
|
||||||
|
size_t N;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Func>
|
||||||
|
__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
|
||||||
|
{
|
||||||
|
ParallelForForState state(array2,minStepSize);
|
||||||
|
|
||||||
|
parallel_for(state.taskCount, [&](const size_t taskIndex)
|
||||||
|
{
|
||||||
|
/* calculate range */
|
||||||
|
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
|
||||||
|
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
|
||||||
|
size_t i0 = state.i0[taskIndex];
|
||||||
|
size_t j0 = state.j0[taskIndex];
|
||||||
|
|
||||||
|
/* iterate over arrays */
|
||||||
|
size_t k=k0;
|
||||||
|
for (size_t i=i0; k<k1; i++) {
|
||||||
|
const size_t N = array2[i] ? array2[i]->size() : 0;
|
||||||
|
const size_t r0 = j0, r1 = min(N,r0+k1-k);
|
||||||
|
if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
|
||||||
|
k+=r1-r0; j0 = 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Func>
|
||||||
|
__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
|
||||||
|
{
|
||||||
|
parallel_for_for(array2,1,func);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
ParallelForForState state(array2,minStepSize);
|
||||||
|
Value temp[ParallelForForState::MAX_TASKS];
|
||||||
|
|
||||||
|
for (size_t i=0; i<state.taskCount; i++)
|
||||||
|
temp[i] = identity;
|
||||||
|
|
||||||
|
parallel_for(state.taskCount, [&](const size_t taskIndex)
|
||||||
|
{
|
||||||
|
/* calculate range */
|
||||||
|
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
|
||||||
|
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
|
||||||
|
size_t i0 = state.i0[taskIndex];
|
||||||
|
size_t j0 = state.j0[taskIndex];
|
||||||
|
|
||||||
|
/* iterate over arrays */
|
||||||
|
size_t k=k0;
|
||||||
|
for (size_t i=i0; k<k1; i++) {
|
||||||
|
const size_t N = array2[i] ? array2[i]->size() : 0;
|
||||||
|
const size_t r0 = j0, r1 = min(N,r0+k1-k);
|
||||||
|
if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
|
||||||
|
k+=r1-r0; j0 = 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Value ret = identity;
|
||||||
|
for (size_t i=0; i<state.taskCount; i++)
|
||||||
|
ret = reduction(ret,temp[i]);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
return parallel_for_for_reduce(array2,1,identity,func,reduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
142
Framework/external/embree/common/algorithms/parallel_for_for_prefix_sum.h
vendored
Normal file
142
Framework/external/embree/common/algorithms/parallel_for_for_prefix_sum.h
vendored
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for_for.h"
|
||||||
|
#include "parallel_prefix_sum.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Value>
|
||||||
|
struct ParallelForForPrefixSumState : public ParallelForForState
|
||||||
|
{
|
||||||
|
__forceinline ParallelForForPrefixSumState () {}
|
||||||
|
|
||||||
|
template<typename ArrayArray>
|
||||||
|
__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
|
||||||
|
: ParallelForForState(array2,minStepSize) {}
|
||||||
|
|
||||||
|
template<typename SizeFunc>
|
||||||
|
__forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize)
|
||||||
|
: ParallelForForState(numArrays,getSize,minStepSize) {}
|
||||||
|
|
||||||
|
ParallelPrefixSumState<Value> prefix_state;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
|
||||||
|
const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
/* calculate number of tasks to use */
|
||||||
|
const size_t taskCount = state.taskCount;
|
||||||
|
|
||||||
|
/* perform parallel prefix sum */
|
||||||
|
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||||
|
{
|
||||||
|
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
||||||
|
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
||||||
|
size_t i0 = state.i0[taskIndex];
|
||||||
|
size_t j0 = state.j0[taskIndex];
|
||||||
|
|
||||||
|
/* iterate over arrays */
|
||||||
|
size_t k=k0;
|
||||||
|
Value N=identity;
|
||||||
|
for (size_t i=i0; k<k1; i++) {
|
||||||
|
const size_t size = getSize(i);
|
||||||
|
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
||||||
|
if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k));
|
||||||
|
k+=r1-r0; j0 = 0;
|
||||||
|
}
|
||||||
|
state.prefix_state.counts[taskIndex] = N;
|
||||||
|
});
|
||||||
|
|
||||||
|
/* calculate prefix sum */
|
||||||
|
Value sum=identity;
|
||||||
|
for (size_t i=0; i<taskCount; i++)
|
||||||
|
{
|
||||||
|
const Value c = state.prefix_state.counts[i];
|
||||||
|
state.prefix_state.sums[i] = sum;
|
||||||
|
sum=reduction(sum,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
|
||||||
|
const SizeFunc& getSize,
|
||||||
|
const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
/* calculate number of tasks to use */
|
||||||
|
const size_t taskCount = state.taskCount;
|
||||||
|
/* perform parallel prefix sum */
|
||||||
|
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||||
|
{
|
||||||
|
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
||||||
|
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
||||||
|
size_t i0 = state.i0[taskIndex];
|
||||||
|
size_t j0 = state.j0[taskIndex];
|
||||||
|
|
||||||
|
/* iterate over arrays */
|
||||||
|
size_t k=k0;
|
||||||
|
Value N=identity;
|
||||||
|
for (size_t i=i0; k<k1; i++) {
|
||||||
|
const size_t size = getSize(i);
|
||||||
|
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
||||||
|
if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N)));
|
||||||
|
k+=r1-r0; j0 = 0;
|
||||||
|
}
|
||||||
|
state.prefix_state.counts[taskIndex] = N;
|
||||||
|
});
|
||||||
|
|
||||||
|
/* calculate prefix sum */
|
||||||
|
Value sum=identity;
|
||||||
|
for (size_t i=0; i<taskCount; i++)
|
||||||
|
{
|
||||||
|
const Value c = state.prefix_state.counts[i];
|
||||||
|
state.prefix_state.sums[i] = sum;
|
||||||
|
sum=reduction(sum,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state,
|
||||||
|
ArrayArray& array2, Index minStepSize,
|
||||||
|
const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
return parallel_for_for_prefix_sum0_(state,minStepSize,
|
||||||
|
[&](Index i) { return array2[i] ? array2[i]->size() : 0; },
|
||||||
|
identity,
|
||||||
|
[&](Index i, const range<Index>& r, Index k) { return func(array2[i], r, k, i); },
|
||||||
|
reduction);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state,
|
||||||
|
ArrayArray& array2, Index minStepSize,
|
||||||
|
const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
return parallel_for_for_prefix_sum1_(state,minStepSize,
|
||||||
|
[&](Index i) { return array2[i] ? array2[i]->size() : 0; },
|
||||||
|
identity,
|
||||||
|
[&](Index i, const range<Index>& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); },
|
||||||
|
reduction);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
||||||
|
const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
||||||
|
const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
85
Framework/external/embree/common/algorithms/parallel_map.h
vendored
Normal file
85
Framework/external/embree/common/algorithms/parallel_map.h
vendored
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_sort.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! implementation of a key/value map with parallel construction */
|
||||||
|
template<typename Key, typename Val>
|
||||||
|
class parallel_map
|
||||||
|
{
|
||||||
|
/* key/value pair to build the map */
|
||||||
|
struct KeyValue
|
||||||
|
{
|
||||||
|
__forceinline KeyValue () {}
|
||||||
|
|
||||||
|
__forceinline KeyValue (const Key key, const Val val)
|
||||||
|
: key(key), val(val) {}
|
||||||
|
|
||||||
|
__forceinline operator Key() const {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Key key;
|
||||||
|
Val val;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*! parallel map constructors */
|
||||||
|
parallel_map () {}
|
||||||
|
|
||||||
|
/*! construction from pair of vectors */
|
||||||
|
template<typename KeyVector, typename ValVector>
|
||||||
|
parallel_map (const KeyVector& keys, const ValVector& values) { init(keys,values); }
|
||||||
|
|
||||||
|
/*! initialized the parallel map from a vector with keys and values */
|
||||||
|
template<typename KeyVector, typename ValVector>
|
||||||
|
void init(const KeyVector& keys, const ValVector& values)
|
||||||
|
{
|
||||||
|
/* reserve sufficient space for all data */
|
||||||
|
assert(keys.size() == values.size());
|
||||||
|
vec.resize(keys.size());
|
||||||
|
|
||||||
|
/* generate key/value pairs */
|
||||||
|
parallel_for( size_t(0), keys.size(), size_t(4*4096), [&](const range<size_t>& r) {
|
||||||
|
for (size_t i=r.begin(); i<r.end(); i++)
|
||||||
|
vec[i] = KeyValue((Key)keys[i],values[i]);
|
||||||
|
});
|
||||||
|
|
||||||
|
/* perform parallel radix sort of the key/value pairs */
|
||||||
|
std::vector<KeyValue> temp(keys.size());
|
||||||
|
radix_sort<KeyValue,Key>(vec.data(),temp.data(),keys.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Returns a pointer to the value associated with the specified key. The pointer will be nullptr of the key is not contained in the map. */
|
||||||
|
__forceinline const Val* lookup(const Key& key) const
|
||||||
|
{
|
||||||
|
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
|
||||||
|
if (i == vec.end()) return nullptr;
|
||||||
|
if (i->key != key) return nullptr;
|
||||||
|
return &i->val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! If the key is in the map, the function returns the value associated with the key, otherwise it returns the default value. */
|
||||||
|
__forceinline Val lookup(const Key& key, const Val& def) const
|
||||||
|
{
|
||||||
|
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
|
||||||
|
if (i == vec.end()) return def;
|
||||||
|
if (i->key != key) return def;
|
||||||
|
return i->val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! clears all state */
|
||||||
|
void clear() {
|
||||||
|
vec.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<KeyValue> vec; //!< vector containing sorted elements
|
||||||
|
};
|
||||||
|
}
|
||||||
283
Framework/external/embree/common/algorithms/parallel_partition.h
vendored
Normal file
283
Framework/external/embree/common/algorithms/parallel_partition.h
vendored
Normal file
|
|
@ -0,0 +1,283 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for.h"
|
||||||
|
#include "../math/range.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/* serial partitioning */
|
||||||
|
template<typename T, typename V, typename IsLeft, typename Reduction_T>
|
||||||
|
__forceinline size_t serial_partitioning(T* array,
|
||||||
|
const size_t begin,
|
||||||
|
const size_t end,
|
||||||
|
V& leftReduction,
|
||||||
|
V& rightReduction,
|
||||||
|
const IsLeft& is_left,
|
||||||
|
const Reduction_T& reduction_t)
|
||||||
|
{
|
||||||
|
T* l = array + begin;
|
||||||
|
T* r = array + end - 1;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
/* *l < pivot */
|
||||||
|
while (likely(l <= r && is_left(*l) ))
|
||||||
|
{
|
||||||
|
//prefetchw(l+4); // FIXME: enable?
|
||||||
|
reduction_t(leftReduction,*l);
|
||||||
|
++l;
|
||||||
|
}
|
||||||
|
/* *r >= pivot) */
|
||||||
|
while (likely(l <= r && !is_left(*r)))
|
||||||
|
{
|
||||||
|
//prefetchw(r-4); FIXME: enable?
|
||||||
|
reduction_t(rightReduction,*r);
|
||||||
|
--r;
|
||||||
|
}
|
||||||
|
if (r<l) break;
|
||||||
|
|
||||||
|
reduction_t(leftReduction ,*r);
|
||||||
|
reduction_t(rightReduction,*l);
|
||||||
|
xchg(*l,*r);
|
||||||
|
l++; r--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return l - array;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||||
|
class __aligned(64) parallel_partition_task
|
||||||
|
{
|
||||||
|
ALIGNED_CLASS_(64);
|
||||||
|
private:
|
||||||
|
|
||||||
|
static const size_t MAX_TASKS = 64;
|
||||||
|
|
||||||
|
T* array;
|
||||||
|
size_t N;
|
||||||
|
const IsLeft& is_left;
|
||||||
|
const Reduction_T& reduction_t;
|
||||||
|
const Reduction_V& reduction_v;
|
||||||
|
const Vi& identity;
|
||||||
|
|
||||||
|
size_t numTasks;
|
||||||
|
__aligned(64) size_t counter_start[MAX_TASKS+1];
|
||||||
|
__aligned(64) size_t counter_left[MAX_TASKS+1];
|
||||||
|
__aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];
|
||||||
|
__aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS];
|
||||||
|
__aligned(64) V leftReductions[MAX_TASKS];
|
||||||
|
__aligned(64) V rightReductions[MAX_TASKS];
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
__forceinline parallel_partition_task(T* array,
|
||||||
|
const size_t N,
|
||||||
|
const Vi& identity,
|
||||||
|
const IsLeft& is_left,
|
||||||
|
const Reduction_T& reduction_t,
|
||||||
|
const Reduction_V& reduction_v,
|
||||||
|
const size_t BLOCK_SIZE)
|
||||||
|
|
||||||
|
: array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
|
||||||
|
numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
|
||||||
|
|
||||||
|
__forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
|
||||||
|
{
|
||||||
|
size_t i = 0;
|
||||||
|
while(index >= (size_t)r[i].size())
|
||||||
|
{
|
||||||
|
assert(i < numRanges);
|
||||||
|
index -= (size_t)r[i].size();
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return &r[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
|
||||||
|
const size_t numRightMisplacedRanges,
|
||||||
|
const size_t startID,
|
||||||
|
const size_t endID)
|
||||||
|
{
|
||||||
|
size_t leftLocalIndex = startID;
|
||||||
|
size_t rightLocalIndex = startID;
|
||||||
|
const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
|
||||||
|
const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
|
||||||
|
|
||||||
|
size_t l_left = l_range->size() - leftLocalIndex;
|
||||||
|
size_t r_left = r_range->size() - rightLocalIndex;
|
||||||
|
T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
|
||||||
|
T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
|
||||||
|
size_t size = endID - startID;
|
||||||
|
size_t items = min(size,min(l_left,r_left));
|
||||||
|
|
||||||
|
while (size)
|
||||||
|
{
|
||||||
|
if (unlikely(l_left == 0))
|
||||||
|
{
|
||||||
|
l_range++;
|
||||||
|
l_left = l_range->size();
|
||||||
|
l = &array[l_range->begin()];
|
||||||
|
items = min(size,min(l_left,r_left));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(r_left == 0))
|
||||||
|
{
|
||||||
|
r_range++;
|
||||||
|
r_left = r_range->size();
|
||||||
|
r = &array[r_range->begin()];
|
||||||
|
items = min(size,min(l_left,r_left));
|
||||||
|
}
|
||||||
|
|
||||||
|
size -= items;
|
||||||
|
l_left -= items;
|
||||||
|
r_left -= items;
|
||||||
|
|
||||||
|
while(items) {
|
||||||
|
items--;
|
||||||
|
xchg(*l++,*r++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline size_t partition(V& leftReduction, V& rightReduction)
|
||||||
|
{
|
||||||
|
/* partition the individual ranges for each task */
|
||||||
|
parallel_for(numTasks,[&] (const size_t taskID) {
|
||||||
|
const size_t startID = (taskID+0)*N/numTasks;
|
||||||
|
const size_t endID = (taskID+1)*N/numTasks;
|
||||||
|
V local_left(identity);
|
||||||
|
V local_right(identity);
|
||||||
|
const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
|
||||||
|
counter_start[taskID] = startID;
|
||||||
|
counter_left [taskID] = mid-startID;
|
||||||
|
leftReductions[taskID] = local_left;
|
||||||
|
rightReductions[taskID] = local_right;
|
||||||
|
});
|
||||||
|
counter_start[numTasks] = N;
|
||||||
|
counter_left[numTasks] = 0;
|
||||||
|
|
||||||
|
/* finalize the reductions */
|
||||||
|
for (size_t i=0; i<numTasks; i++) {
|
||||||
|
reduction_v(leftReduction,leftReductions[i]);
|
||||||
|
reduction_v(rightReduction,rightReductions[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate mid point for partitioning */
|
||||||
|
size_t mid = counter_left[0];
|
||||||
|
for (size_t i=1; i<numTasks; i++)
|
||||||
|
mid += counter_left[i];
|
||||||
|
const range<ssize_t> globalLeft (0,mid);
|
||||||
|
const range<ssize_t> globalRight(mid,N);
|
||||||
|
|
||||||
|
/* calculate all left and right ranges that are on the wrong global side */
|
||||||
|
size_t numMisplacedRangesLeft = 0;
|
||||||
|
size_t numMisplacedRangesRight = 0;
|
||||||
|
size_t numMisplacedItemsLeft MAYBE_UNUSED = 0;
|
||||||
|
size_t numMisplacedItemsRight MAYBE_UNUSED = 0;
|
||||||
|
|
||||||
|
for (size_t i=0; i<numTasks; i++)
|
||||||
|
{
|
||||||
|
const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
|
||||||
|
const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
|
||||||
|
const range<ssize_t> left_misplaced = globalLeft. intersect(right_range);
|
||||||
|
const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
|
||||||
|
|
||||||
|
if (!left_misplaced.empty())
|
||||||
|
{
|
||||||
|
numMisplacedItemsLeft += left_misplaced.size();
|
||||||
|
leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!right_misplaced.empty())
|
||||||
|
{
|
||||||
|
numMisplacedItemsRight += right_misplaced.size();
|
||||||
|
rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert( numMisplacedItemsLeft == numMisplacedItemsRight );
|
||||||
|
|
||||||
|
/* if no items are misplaced we are done */
|
||||||
|
if (numMisplacedItemsLeft == 0)
|
||||||
|
return mid;
|
||||||
|
|
||||||
|
/* otherwise we copy the items to the right place in parallel */
|
||||||
|
parallel_for(numTasks,[&] (const size_t taskID) {
|
||||||
|
const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
|
||||||
|
const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks;
|
||||||
|
swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);
|
||||||
|
});
|
||||||
|
|
||||||
|
return mid;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||||
|
__noinline size_t parallel_partitioning(T* array,
|
||||||
|
const size_t begin,
|
||||||
|
const size_t end,
|
||||||
|
const Vi &identity,
|
||||||
|
V &leftReduction,
|
||||||
|
V &rightReduction,
|
||||||
|
const IsLeft& is_left,
|
||||||
|
const Reduction_T& reduction_t,
|
||||||
|
const Reduction_V& reduction_v,
|
||||||
|
size_t BLOCK_SIZE = 128)
|
||||||
|
{
|
||||||
|
/* fall back to single threaded partitioning for small N */
|
||||||
|
if (unlikely(end-begin < BLOCK_SIZE))
|
||||||
|
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
|
||||||
|
|
||||||
|
/* otherwise use parallel code */
|
||||||
|
else {
|
||||||
|
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
|
||||||
|
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
|
||||||
|
return begin+p->partition(leftReduction,rightReduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||||
|
__noinline size_t parallel_partitioning(T* array,
|
||||||
|
const size_t begin,
|
||||||
|
const size_t end,
|
||||||
|
const Vi &identity,
|
||||||
|
V &leftReduction,
|
||||||
|
V &rightReduction,
|
||||||
|
const IsLeft& is_left,
|
||||||
|
const Reduction_T& reduction_t,
|
||||||
|
const Reduction_V& reduction_v,
|
||||||
|
size_t BLOCK_SIZE,
|
||||||
|
size_t PARALLEL_THRESHOLD)
|
||||||
|
{
|
||||||
|
/* fall back to single threaded partitioning for small N */
|
||||||
|
if (unlikely(end-begin < PARALLEL_THRESHOLD))
|
||||||
|
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
|
||||||
|
|
||||||
|
/* otherwise use parallel code */
|
||||||
|
else {
|
||||||
|
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
|
||||||
|
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
|
||||||
|
return begin+p->partition(leftReduction,rightReduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T, typename IsLeft>
|
||||||
|
inline size_t parallel_partitioning(T* array,
|
||||||
|
const size_t begin,
|
||||||
|
const size_t end,
|
||||||
|
const IsLeft& is_left,
|
||||||
|
size_t BLOCK_SIZE = 128)
|
||||||
|
{
|
||||||
|
size_t leftReduction = 0;
|
||||||
|
size_t rightReduction = 0;
|
||||||
|
return parallel_partitioning(
|
||||||
|
array,begin,end,0,leftReduction,rightReduction,is_left,
|
||||||
|
[] (size_t& t,const T& ref) { },
|
||||||
|
[] (size_t& t0,size_t& t1) { },
|
||||||
|
BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
85
Framework/external/embree/common/algorithms/parallel_prefix_sum.h
vendored
Normal file
85
Framework/external/embree/common/algorithms/parallel_prefix_sum.h
vendored
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Value>
|
||||||
|
struct ParallelPrefixSumState
|
||||||
|
{
|
||||||
|
enum { MAX_TASKS = 64 };
|
||||||
|
Value counts[MAX_TASKS];
|
||||||
|
Value sums [MAX_TASKS];
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
|
||||||
|
{
|
||||||
|
/* calculate number of tasks to use */
|
||||||
|
const size_t numThreads = TaskScheduler::threadCount();
|
||||||
|
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
|
||||||
|
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
|
||||||
|
|
||||||
|
/* perform parallel prefix sum */
|
||||||
|
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||||
|
{
|
||||||
|
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
|
||||||
|
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
|
||||||
|
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
|
||||||
|
});
|
||||||
|
|
||||||
|
/* calculate prefix sum */
|
||||||
|
Value sum=identity;
|
||||||
|
for (size_t i=0; i<taskCount; i++)
|
||||||
|
{
|
||||||
|
const Value c = state.counts[i];
|
||||||
|
state.sums[i] = sum;
|
||||||
|
sum=reduction(sum,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! parallel calculation of prefix sums */
|
||||||
|
template<typename SrcArray, typename DstArray, typename Value, typename Add>
|
||||||
|
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
|
||||||
|
{
|
||||||
|
/* perform single threaded prefix operation for small N */
|
||||||
|
if (N < SINGLE_THREAD_THRESHOLD)
|
||||||
|
{
|
||||||
|
Value sum=identity;
|
||||||
|
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* perform parallel prefix operation for large N */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ParallelPrefixSumState<Value> state;
|
||||||
|
|
||||||
|
/* initial run just sets up start values for subtasks */
|
||||||
|
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||||
|
|
||||||
|
Value s = identity;
|
||||||
|
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
|
||||||
|
return s;
|
||||||
|
|
||||||
|
}, add);
|
||||||
|
|
||||||
|
/* final run calculates prefix sum */
|
||||||
|
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||||
|
|
||||||
|
Value s = identity;
|
||||||
|
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||||
|
dst[i] = add(sum,s);
|
||||||
|
s = add(s,src[i]);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
|
||||||
|
}, add);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
146
Framework/external/embree/common/algorithms/parallel_reduce.h
vendored
Normal file
146
Framework/external/embree/common/algorithms/parallel_reduce.h
vendored
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_for.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
return func(range<Index>(first,last));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
return func(range<Index>(first,last));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
const Index maxTasks = 512;
|
||||||
|
const Index threadCount = (Index) TaskScheduler::threadCount();
|
||||||
|
taskCount = min(taskCount,threadCount,maxTasks);
|
||||||
|
|
||||||
|
/* parallel invocation of all tasks */
|
||||||
|
dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
|
||||||
|
parallel_for(taskCount, [&](const Index taskIndex) {
|
||||||
|
const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;
|
||||||
|
const Index k1 = first+(taskIndex+1)*(last-first)/taskCount;
|
||||||
|
values[taskIndex] = func(range<Index>(k0,k1));
|
||||||
|
});
|
||||||
|
|
||||||
|
/* perform reduction over all tasks */
|
||||||
|
Value v = identity;
|
||||||
|
for (Index i=0; i<taskCount; i++) v = reduction(v,values[i]);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||||
|
|
||||||
|
/* fast path for small number of iterations */
|
||||||
|
Index taskCount = (last-first+minStepSize-1)/minStepSize;
|
||||||
|
if (likely(taskCount == 1)) {
|
||||||
|
return func(range<Index>(first,last));
|
||||||
|
}
|
||||||
|
return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
|
||||||
|
|
||||||
|
#elif defined(TASKING_TBB)
|
||||||
|
#if TBB_INTERFACE_VERSION >= 12002
|
||||||
|
tbb::task_group_context context;
|
||||||
|
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||||
|
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||||
|
reduction,context);
|
||||||
|
if (context.is_group_execution_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
return v;
|
||||||
|
#else
|
||||||
|
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||||
|
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||||
|
reduction);
|
||||||
|
if (tbb::task::self().is_cancelled())
|
||||||
|
throw std::runtime_error("task cancelled");
|
||||||
|
return v;
|
||||||
|
#endif
|
||||||
|
#else // TASKING_PPL
|
||||||
|
struct AlignedValue
|
||||||
|
{
|
||||||
|
char storage[__alignof(Value)+sizeof(Value)];
|
||||||
|
static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); };
|
||||||
|
Value* getValuePtr() { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
|
||||||
|
const Value* getValuePtr() const { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
|
||||||
|
AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
|
||||||
|
AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
|
||||||
|
AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
|
||||||
|
AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
|
||||||
|
AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
|
||||||
|
operator Value() const { return *getValuePtr(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Iterator_Index
|
||||||
|
{
|
||||||
|
Index v;
|
||||||
|
typedef std::forward_iterator_tag iterator_category;
|
||||||
|
typedef AlignedValue value_type;
|
||||||
|
typedef Index difference_type;
|
||||||
|
typedef Index distance_type;
|
||||||
|
typedef AlignedValue* pointer;
|
||||||
|
typedef AlignedValue& reference;
|
||||||
|
__forceinline Iterator_Index() {}
|
||||||
|
__forceinline Iterator_Index(Index v) : v(v) {}
|
||||||
|
__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
|
||||||
|
__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
|
||||||
|
__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
|
||||||
|
__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
|
||||||
|
};
|
||||||
|
|
||||||
|
auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
|
||||||
|
assert(begin.v < end.v);
|
||||||
|
return reduction(start, func(range<Index>(begin.v, end.v)));
|
||||||
|
};
|
||||||
|
const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
|
||||||
|
return v;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
if (likely(last-first < parallel_threshold)) {
|
||||||
|
return func(range<Index>(first,last));
|
||||||
|
} else {
|
||||||
|
return parallel_reduce(first,last,minStepSize,identity,func,reduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
auto funcr = [&] ( const range<Index> r ) {
|
||||||
|
Value v = identity;
|
||||||
|
for (Index i=r.begin(); i<r.end(); i++)
|
||||||
|
v = reduction(v,func(i));
|
||||||
|
return v;
|
||||||
|
};
|
||||||
|
return parallel_reduce(first,last,Index(1),identity,funcr,reduction);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||||
|
__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
|
||||||
|
{
|
||||||
|
return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction);
|
||||||
|
}
|
||||||
|
}
|
||||||
52
Framework/external/embree/common/algorithms/parallel_set.h
vendored
Normal file
52
Framework/external/embree/common/algorithms/parallel_set.h
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "parallel_sort.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/* implementation of a set of values with parallel construction */
|
||||||
|
template<typename T>
|
||||||
|
class parallel_set
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*! default constructor for the parallel set */
|
||||||
|
parallel_set () {}
|
||||||
|
|
||||||
|
/*! construction from vector */
|
||||||
|
template<typename Vector>
|
||||||
|
parallel_set (const Vector& in) { init(in); }
|
||||||
|
|
||||||
|
/*! initialized the parallel set from a vector */
|
||||||
|
template<typename Vector>
|
||||||
|
void init(const Vector& in)
|
||||||
|
{
|
||||||
|
/* copy data to internal vector */
|
||||||
|
vec.resize(in.size());
|
||||||
|
parallel_for( size_t(0), in.size(), size_t(4*4096), [&](const range<size_t>& r) {
|
||||||
|
for (size_t i=r.begin(); i<r.end(); i++)
|
||||||
|
vec[i] = in[i];
|
||||||
|
});
|
||||||
|
|
||||||
|
/* sort the data */
|
||||||
|
std::vector<T> temp(in.size());
|
||||||
|
radix_sort<T>(vec.data(),temp.data(),vec.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! tests if some element is in the set */
|
||||||
|
__forceinline bool lookup(const T& elt) const {
|
||||||
|
return std::binary_search(vec.begin(), vec.end(), elt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! clears all state */
|
||||||
|
void clear() {
|
||||||
|
vec.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<T> vec; //!< vector containing sorted elements
|
||||||
|
};
|
||||||
|
}
|
||||||
454
Framework/external/embree/common/algorithms/parallel_sort.h
vendored
Normal file
454
Framework/external/embree/common/algorithms/parallel_sort.h
vendored
Normal file
|
|
@ -0,0 +1,454 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../simd/simd.h"
|
||||||
|
#include "parallel_for.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<class T>
|
||||||
|
__forceinline void insertionsort_ascending(T *__restrict__ array, const size_t length)
|
||||||
|
{
|
||||||
|
for(size_t i = 1;i<length;++i)
|
||||||
|
{
|
||||||
|
T v = array[i];
|
||||||
|
size_t j = i;
|
||||||
|
while(j > 0 && v < array[j-1])
|
||||||
|
{
|
||||||
|
array[j] = array[j-1];
|
||||||
|
--j;
|
||||||
|
}
|
||||||
|
array[j] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
__forceinline void insertionsort_decending(T *__restrict__ array, const size_t length)
|
||||||
|
{
|
||||||
|
for(size_t i = 1;i<length;++i)
|
||||||
|
{
|
||||||
|
T v = array[i];
|
||||||
|
size_t j = i;
|
||||||
|
while(j > 0 && v > array[j-1])
|
||||||
|
{
|
||||||
|
array[j] = array[j-1];
|
||||||
|
--j;
|
||||||
|
}
|
||||||
|
array[j] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void quicksort_ascending(T *__restrict__ t,
|
||||||
|
const ssize_t begin,
|
||||||
|
const ssize_t end)
|
||||||
|
{
|
||||||
|
if (likely(begin < end))
|
||||||
|
{
|
||||||
|
const T pivotvalue = t[begin];
|
||||||
|
ssize_t left = begin - 1;
|
||||||
|
ssize_t right = end + 1;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
while (t[--right] > pivotvalue);
|
||||||
|
while (t[++left] < pivotvalue);
|
||||||
|
|
||||||
|
if (left >= right) break;
|
||||||
|
|
||||||
|
const T temp = t[right];
|
||||||
|
t[right] = t[left];
|
||||||
|
t[left] = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int pivot = right;
|
||||||
|
quicksort_ascending(t, begin, pivot);
|
||||||
|
quicksort_ascending(t, pivot + 1, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void quicksort_decending(T *__restrict__ t,
|
||||||
|
const ssize_t begin,
|
||||||
|
const ssize_t end)
|
||||||
|
{
|
||||||
|
if (likely(begin < end))
|
||||||
|
{
|
||||||
|
const T pivotvalue = t[begin];
|
||||||
|
ssize_t left = begin - 1;
|
||||||
|
ssize_t right = end + 1;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
while (t[--right] < pivotvalue);
|
||||||
|
while (t[++left] > pivotvalue);
|
||||||
|
|
||||||
|
if (left >= right) break;
|
||||||
|
|
||||||
|
const T temp = t[right];
|
||||||
|
t[right] = t[left];
|
||||||
|
t[left] = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int pivot = right;
|
||||||
|
quicksort_decending(t, begin, pivot);
|
||||||
|
quicksort_decending(t, pivot + 1, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T, ssize_t THRESHOLD>
|
||||||
|
void quicksort_insertionsort_ascending(T *__restrict__ t,
|
||||||
|
const ssize_t begin,
|
||||||
|
const ssize_t end)
|
||||||
|
{
|
||||||
|
if (likely(begin < end))
|
||||||
|
{
|
||||||
|
const ssize_t size = end-begin+1;
|
||||||
|
if (likely(size <= THRESHOLD))
|
||||||
|
{
|
||||||
|
insertionsort_ascending<T>(&t[begin],size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const T pivotvalue = t[begin];
|
||||||
|
ssize_t left = begin - 1;
|
||||||
|
ssize_t right = end + 1;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
while (t[--right] > pivotvalue);
|
||||||
|
while (t[++left] < pivotvalue);
|
||||||
|
|
||||||
|
if (left >= right) break;
|
||||||
|
|
||||||
|
const T temp = t[right];
|
||||||
|
t[right] = t[left];
|
||||||
|
t[left] = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ssize_t pivot = right;
|
||||||
|
quicksort_insertionsort_ascending<T,THRESHOLD>(t, begin, pivot);
|
||||||
|
quicksort_insertionsort_ascending<T,THRESHOLD>(t, pivot + 1, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T, ssize_t THRESHOLD>
|
||||||
|
void quicksort_insertionsort_decending(T *__restrict__ t,
|
||||||
|
const ssize_t begin,
|
||||||
|
const ssize_t end)
|
||||||
|
{
|
||||||
|
if (likely(begin < end))
|
||||||
|
{
|
||||||
|
const ssize_t size = end-begin+1;
|
||||||
|
if (likely(size <= THRESHOLD))
|
||||||
|
{
|
||||||
|
insertionsort_decending<T>(&t[begin],size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
const T pivotvalue = t[begin];
|
||||||
|
ssize_t left = begin - 1;
|
||||||
|
ssize_t right = end + 1;
|
||||||
|
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
while (t[--right] < pivotvalue);
|
||||||
|
while (t[++left] > pivotvalue);
|
||||||
|
|
||||||
|
if (left >= right) break;
|
||||||
|
|
||||||
|
const T temp = t[right];
|
||||||
|
t[right] = t[left];
|
||||||
|
t[left] = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ssize_t pivot = right;
|
||||||
|
quicksort_insertionsort_decending<T,THRESHOLD>(t, begin, pivot);
|
||||||
|
quicksort_insertionsort_decending<T,THRESHOLD>(t, pivot + 1, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static void radixsort32(T* const morton, const size_t num, const unsigned int shift = 3*8)
|
||||||
|
{
|
||||||
|
static const unsigned int BITS = 8;
|
||||||
|
static const unsigned int BUCKETS = (1 << BITS);
|
||||||
|
static const unsigned int CMP_SORT_THRESHOLD = 16;
|
||||||
|
|
||||||
|
__aligned(64) unsigned int count[BUCKETS];
|
||||||
|
|
||||||
|
/* clear buckets */
|
||||||
|
for (size_t i=0;i<BUCKETS;i++) count[i] = 0;
|
||||||
|
|
||||||
|
/* count buckets */
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
#pragma nounroll
|
||||||
|
#endif
|
||||||
|
for (size_t i=0;i<num;i++)
|
||||||
|
count[(unsigned(morton[i]) >> shift) & (BUCKETS-1)]++;
|
||||||
|
|
||||||
|
/* prefix sums */
|
||||||
|
__aligned(64) unsigned int head[BUCKETS];
|
||||||
|
__aligned(64) unsigned int tail[BUCKETS];
|
||||||
|
|
||||||
|
head[0] = 0;
|
||||||
|
for (size_t i=1; i<BUCKETS; i++)
|
||||||
|
head[i] = head[i-1] + count[i-1];
|
||||||
|
|
||||||
|
for (size_t i=0; i<BUCKETS-1; i++)
|
||||||
|
tail[i] = head[i+1];
|
||||||
|
|
||||||
|
tail[BUCKETS-1] = head[BUCKETS-1] + count[BUCKETS-1];
|
||||||
|
|
||||||
|
assert(tail[BUCKETS-1] == head[BUCKETS-1] + count[BUCKETS-1]);
|
||||||
|
assert(tail[BUCKETS-1] == num);
|
||||||
|
|
||||||
|
/* in-place swap */
|
||||||
|
for (size_t i=0;i<BUCKETS;i++)
|
||||||
|
{
|
||||||
|
/* process bucket */
|
||||||
|
while(head[i] < tail[i])
|
||||||
|
{
|
||||||
|
T v = morton[head[i]];
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
const size_t b = (unsigned(v) >> shift) & (BUCKETS-1);
|
||||||
|
if (b == i) break;
|
||||||
|
std::swap(v,morton[head[b]++]);
|
||||||
|
}
|
||||||
|
assert((unsigned(v) >> shift & (BUCKETS-1)) == i);
|
||||||
|
morton[head[i]++] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (shift == 0) return;
|
||||||
|
|
||||||
|
size_t offset = 0;
|
||||||
|
for (size_t i=0;i<BUCKETS;i++)
|
||||||
|
if (count[i])
|
||||||
|
{
|
||||||
|
|
||||||
|
for (size_t j=offset;j<offset+count[i]-1;j++)
|
||||||
|
assert(((unsigned(morton[j]) >> shift) & (BUCKETS-1)) == i);
|
||||||
|
|
||||||
|
if (unlikely(count[i] < CMP_SORT_THRESHOLD))
|
||||||
|
insertionsort_ascending(morton + offset, count[i]);
|
||||||
|
else
|
||||||
|
radixsort32(morton + offset, count[i], shift-BITS);
|
||||||
|
|
||||||
|
for (size_t j=offset;j<offset+count[i]-1;j++)
|
||||||
|
assert(morton[j] <= morton[j+1]);
|
||||||
|
|
||||||
|
offset += count[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty, typename Key>
|
||||||
|
class ParallelRadixSort
|
||||||
|
{
|
||||||
|
static const size_t MAX_TASKS = 64;
|
||||||
|
static const size_t BITS = 8;
|
||||||
|
static const size_t BUCKETS = (1 << BITS);
|
||||||
|
typedef unsigned int TyRadixCount[BUCKETS];
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static bool compare(const T& v0, const T& v1) {
|
||||||
|
return (Key)v0 < (Key)v1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ParallelRadixSort (const ParallelRadixSort& other) DELETED; // do not implement
|
||||||
|
ParallelRadixSort& operator= (const ParallelRadixSort& other) DELETED; // do not implement
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
ParallelRadixSort (Ty* const src, Ty* const tmp, const size_t N)
|
||||||
|
: radixCount(nullptr), src(src), tmp(tmp), N(N) {}
|
||||||
|
|
||||||
|
void sort(const size_t blockSize)
|
||||||
|
{
|
||||||
|
assert(blockSize > 0);
|
||||||
|
|
||||||
|
/* perform single threaded sort for small N */
|
||||||
|
if (N<=blockSize) // handles also special case of 0!
|
||||||
|
{
|
||||||
|
/* do inplace sort inside destination array */
|
||||||
|
std::sort(src,src+N,compare<Ty>);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* perform parallel sort for large N */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const size_t numThreads = min((N+blockSize-1)/blockSize,TaskScheduler::threadCount(),size_t(MAX_TASKS));
|
||||||
|
tbbRadixSort(numThreads);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~ParallelRadixSort()
|
||||||
|
{
|
||||||
|
alignedFree(radixCount);
|
||||||
|
radixCount = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
void tbbRadixIteration0(const Key shift,
|
||||||
|
const Ty* __restrict const src,
|
||||||
|
Ty* __restrict const dst,
|
||||||
|
const size_t threadIndex, const size_t threadCount)
|
||||||
|
{
|
||||||
|
const size_t startID = (threadIndex+0)*N/threadCount;
|
||||||
|
const size_t endID = (threadIndex+1)*N/threadCount;
|
||||||
|
|
||||||
|
/* mask to extract some number of bits */
|
||||||
|
const Key mask = BUCKETS-1;
|
||||||
|
|
||||||
|
/* count how many items go into the buckets */
|
||||||
|
for (size_t i=0; i<BUCKETS; i++)
|
||||||
|
radixCount[threadIndex][i] = 0;
|
||||||
|
|
||||||
|
/* iterate over src array and count buckets */
|
||||||
|
unsigned int * __restrict const count = radixCount[threadIndex];
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
#pragma nounroll
|
||||||
|
#endif
|
||||||
|
for (size_t i=startID; i<endID; i++) {
|
||||||
|
#if defined(__64BIT__)
|
||||||
|
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
|
||||||
|
#else
|
||||||
|
const Key index = ((Key)src[i] >> shift) & mask;
|
||||||
|
#endif
|
||||||
|
count[index]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tbbRadixIteration1(const Key shift,
|
||||||
|
const Ty* __restrict const src,
|
||||||
|
Ty* __restrict const dst,
|
||||||
|
const size_t threadIndex, const size_t threadCount)
|
||||||
|
{
|
||||||
|
const size_t startID = (threadIndex+0)*N/threadCount;
|
||||||
|
const size_t endID = (threadIndex+1)*N/threadCount;
|
||||||
|
|
||||||
|
/* mask to extract some number of bits */
|
||||||
|
const Key mask = BUCKETS-1;
|
||||||
|
|
||||||
|
/* calculate total number of items for each bucket */
|
||||||
|
__aligned(64) unsigned int total[BUCKETS];
|
||||||
|
/*
|
||||||
|
for (size_t i=0; i<BUCKETS; i++)
|
||||||
|
total[i] = 0;
|
||||||
|
*/
|
||||||
|
for (size_t i=0; i<BUCKETS; i+=VSIZEX)
|
||||||
|
vintx::store(&total[i], zero);
|
||||||
|
|
||||||
|
for (size_t i=0; i<threadCount; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
for (size_t j=0; j<BUCKETS; j++)
|
||||||
|
total[j] += radixCount[i][j];
|
||||||
|
*/
|
||||||
|
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
|
||||||
|
vintx::store(&total[j], vintx::load(&total[j]) + vintx::load(&radixCount[i][j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate start offset of each bucket */
|
||||||
|
__aligned(64) unsigned int offset[BUCKETS];
|
||||||
|
offset[0] = 0;
|
||||||
|
for (size_t i=1; i<BUCKETS; i++)
|
||||||
|
offset[i] = offset[i-1] + total[i-1];
|
||||||
|
|
||||||
|
/* calculate start offset of each bucket for this thread */
|
||||||
|
for (size_t i=0; i<threadIndex; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
for (size_t j=0; j<BUCKETS; j++)
|
||||||
|
offset[j] += radixCount[i][j];
|
||||||
|
*/
|
||||||
|
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
|
||||||
|
vintx::store(&offset[j], vintx::load(&offset[j]) + vintx::load(&radixCount[i][j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy items into their buckets */
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
#pragma nounroll
|
||||||
|
#endif
|
||||||
|
for (size_t i=startID; i<endID; i++) {
|
||||||
|
const Ty elt = src[i];
|
||||||
|
#if defined(__64BIT__)
|
||||||
|
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
|
||||||
|
#else
|
||||||
|
const size_t index = ((Key)src[i] >> shift) & mask;
|
||||||
|
#endif
|
||||||
|
dst[offset[index]++] = elt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tbbRadixIteration(const Key shift, const bool last,
|
||||||
|
const Ty* __restrict src, Ty* __restrict dst,
|
||||||
|
const size_t numTasks)
|
||||||
|
{
|
||||||
|
affinity_partitioner ap;
|
||||||
|
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration0(shift,src,dst,taskIndex,numTasks); },ap);
|
||||||
|
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration1(shift,src,dst,taskIndex,numTasks); },ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tbbRadixSort(const size_t numTasks)
|
||||||
|
{
|
||||||
|
radixCount = (TyRadixCount*) alignedMalloc(MAX_TASKS*sizeof(TyRadixCount),64);
|
||||||
|
|
||||||
|
if (sizeof(Key) == sizeof(uint32_t)) {
|
||||||
|
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
|
||||||
|
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(3*BITS,1,tmp,src,numTasks);
|
||||||
|
}
|
||||||
|
else if (sizeof(Key) == sizeof(uint64_t))
|
||||||
|
{
|
||||||
|
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
|
||||||
|
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(3*BITS,0,tmp,src,numTasks);
|
||||||
|
tbbRadixIteration(4*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(5*BITS,0,tmp,src,numTasks);
|
||||||
|
tbbRadixIteration(6*BITS,0,src,tmp,numTasks);
|
||||||
|
tbbRadixIteration(7*BITS,1,tmp,src,numTasks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
TyRadixCount* radixCount;
|
||||||
|
Ty* const src;
|
||||||
|
Ty* const tmp;
|
||||||
|
const size_t N;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Ty>
|
||||||
|
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
|
||||||
|
{
|
||||||
|
ParallelRadixSort<Ty,Ty>(src,tmp,N).sort(blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty, typename Key>
|
||||||
|
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
|
||||||
|
{
|
||||||
|
ParallelRadixSort<Ty,Key>(src,tmp,N).sort(blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty>
|
||||||
|
void radix_sort_u32(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
|
||||||
|
radix_sort<Ty,uint32_t>(src,tmp,N,blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty>
|
||||||
|
void radix_sort_u64(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
|
||||||
|
radix_sort<Ty,uint64_t>(src,tmp,N,blockSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
75
Framework/external/embree/common/cmake/FindOpenImageIO.cmake
vendored
Normal file
75
Framework/external/embree/common/cmake/FindOpenImageIO.cmake
vendored
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
IF (NOT OPENIMAGEIO_ROOT)
|
||||||
|
SET(OPENIMAGEIO_ROOT $ENV{OPENIMAGEIO_ROOT})
|
||||||
|
ENDIF()
|
||||||
|
IF (NOT OPENIMAGEIO_ROOT)
|
||||||
|
SET(OPENIMAGEIO_ROOT $ENV{OPENIMAGEIOROOT})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# detect changed OPENIMAGEIO_ROOT
|
||||||
|
IF (NOT OPENIMAGEIO_ROOT STREQUAL OPENIMAGEIO_ROOT_LAST)
|
||||||
|
UNSET(OPENIMAGEIO_INCLUDE_DIR CACHE)
|
||||||
|
UNSET(OPENIMAGEIO_LIBRARY CACHE)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
set(OPENIMAGEIO_LIB_SUFFIX "")
|
||||||
|
IF (WIN32)
|
||||||
|
IF (MSVC14)
|
||||||
|
SET(OPENIMAGEIO_LIB_SUFFIX "vc2015")
|
||||||
|
ELSEIF (MSVC12)
|
||||||
|
SET(OPENIMAGEIO_LIB_SUFFIX "vc2013")
|
||||||
|
ELSEIF (MSVC11)
|
||||||
|
SET(OPENIMAGEIO_LIB_SUFFIX "vc2012")
|
||||||
|
ELSEIF (MINGW)
|
||||||
|
IF (X64)
|
||||||
|
SET(OPENIMAGEIO_LIB_SUFFIX "mingw-w64")
|
||||||
|
# Who's ever going to build for 32bit??
|
||||||
|
ELSE ()
|
||||||
|
SET(OPENIMAGEIO_LIB_SUFFIX "mingw-w64")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
FIND_PATH(OPENIMAGEIO_ROOT include/OpenImageIO/imageio.h
|
||||||
|
DOC "Root of OpenImageIO installation"
|
||||||
|
HINTS ${OPENIMAGEIO_ROOT}
|
||||||
|
PATHS
|
||||||
|
"${PROJECT_SOURCE_DIR}/oiio"
|
||||||
|
/usr/local
|
||||||
|
/usr
|
||||||
|
/
|
||||||
|
)
|
||||||
|
|
||||||
|
FIND_PATH(OPENIMAGEIO_INCLUDE_DIR OpenImageIO/imageio.h PATHS ${OPENIMAGEIO_ROOT}/include NO_DEFAULT_PATH)
|
||||||
|
SET(OPENIMAGEIO_HINTS
|
||||||
|
HINTS
|
||||||
|
${OPENIMAGEIO_ROOT}
|
||||||
|
PATH_SUFFIXES
|
||||||
|
/lib
|
||||||
|
/lib64
|
||||||
|
/lib-${OPENIMAGEIO_LIB_SUFFIX}
|
||||||
|
)
|
||||||
|
SET(OPENIMAGEIO_PATHS PATHS /usr/lib /usr/lib64 /lib /lib64)
|
||||||
|
FIND_LIBRARY(OPENIMAGEIO_LIBRARY OpenImageIO ${OPENIMAGEIO_HINTS} ${OPENIMAGEIO_PATHS})
|
||||||
|
|
||||||
|
SET(OPENIMAGEIO_ROOT_LAST ${OPENIMAGEIO_ROOT} CACHE INTERNAL "Last value of OPENIMAGEIO_ROOT to detect changes")
|
||||||
|
|
||||||
|
SET(OPENIMAGEIO_ERROR_MESSAGE "OpenImageIO not found in your environment. You can 1) install
|
||||||
|
via your OS package manager, or 2) install it
|
||||||
|
somewhere on your machine and point OPENIMAGEIO_ROOT to it.")
|
||||||
|
|
||||||
|
INCLUDE(FindPackageHandleStandardArgs)
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenImageIO
|
||||||
|
${OPENIMAGEIO_ERROR_MESSAGE}
|
||||||
|
OPENIMAGEIO_INCLUDE_DIR OPENIMAGEIO_LIBRARY
|
||||||
|
)
|
||||||
|
|
||||||
|
IF (OPENIMAGEIO_FOUND)
|
||||||
|
SET(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
|
||||||
|
SET(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARY})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(OPENIMAGEIO_INCLUDE_DIR)
|
||||||
|
MARK_AS_ADVANCED(OPENIMAGEIO_LIBRARY)
|
||||||
11
Framework/external/embree/common/cmake/FindPNG.cmake
vendored
Normal file
11
Framework/external/embree/common/cmake/FindPNG.cmake
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
FIND_PATH( PNG_INCLUDE_DIR NAMES png.h )
|
||||||
|
FIND_LIBRARY( PNG_LIBRARIES NAMES png )
|
||||||
|
|
||||||
|
INCLUDE(FindPackageHandleStandardArgs)
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PNG DEFAULT_MSG PNG_INCLUDE_DIR PNG_LIBRARIES)
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(PNG_INCLUDE_DIR)
|
||||||
|
MARK_AS_ADVANCED(PNG_LIBRARIES)
|
||||||
482
Framework/external/embree/common/cmake/FindTBB.cmake
vendored
Normal file
482
Framework/external/embree/common/cmake/FindTBB.cmake
vendored
Normal file
|
|
@ -0,0 +1,482 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# This script will attempt to find TBB and set up a TBB target.
|
||||||
|
#
|
||||||
|
# The user may specify a version and lists of required and optional components:
|
||||||
|
#
|
||||||
|
# find_package(TBB 2017.0 EXACT REQUIRED
|
||||||
|
# tbb tbbmalloc
|
||||||
|
# OPTIONAL_COMPONENTS tbbmalloc_proxy
|
||||||
|
# QUIET)
|
||||||
|
#
|
||||||
|
# If this target exists already, the script will attempt to re-use it, but fail
|
||||||
|
# if version or components do not match the user-specified requirements.
|
||||||
|
#
|
||||||
|
# If all the required component targets (e.g. TBB::tbb) exist, the script will
|
||||||
|
# attempt to create a target TBB and link existing component targets to it.
|
||||||
|
# It will fail if the component target version does not match the user-specified
|
||||||
|
# requirements.
|
||||||
|
#
|
||||||
|
# The user may specify the following variables to help the search process:
|
||||||
|
# - TBB_ROOT
|
||||||
|
# - TBB_INCLUDE_DIR
|
||||||
|
#
|
||||||
|
# After the script has run successfully, there is a target TBB, as well as
|
||||||
|
# component targets TBB::<COMPONENT>, e.g. TBB::tbbmalloc.
|
||||||
|
#
|
||||||
|
# The targets will attempt to link to release versions of TBB in release mode,
|
||||||
|
# and debug versions in debug mode.
|
||||||
|
#
|
||||||
|
# In addition to the targets, the script defines:
|
||||||
|
#
|
||||||
|
# TBB_FOUND
|
||||||
|
# TBB_INCLUDE_DIRS
|
||||||
|
#
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
# We use INTERFACE libraries, which are only supported in 3.x
|
||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
|
||||||
|
# These two are used to automatically find the root and include directories.
|
||||||
|
set(_TBB_INCLUDE_SUBDIR "include")
|
||||||
|
set(_TBB_HEADER "tbb/tbb.h")
|
||||||
|
|
||||||
|
# Initialize cache variable; but use existing non-cache variable as the default,
|
||||||
|
# and fall back to the environment variable.
|
||||||
|
if (NOT TBB_ROOT)
|
||||||
|
set(TBB_ROOT "$ENV{TBB_ROOT}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(TBB_ROOT "${TBB_ROOT}" CACHE PATH "The root path of TBB.")
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Error messages that respect the user's wishes about peace and quiet.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
function(rk_tbb_status)
|
||||||
|
if (NOT TBB_FIND_QUIETLY)
|
||||||
|
message(STATUS "${ARGV}")
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
function(rk_tbb_warning)
|
||||||
|
if (NOT TBB_FIND_QUIETLY)
|
||||||
|
message(WARNING "${ARGV}")
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
macro(rk_tbb_error)
|
||||||
|
if (TBB_FIND_REQUIRED)
|
||||||
|
message(FATAL_ERROR "${ARGV}")
|
||||||
|
else()
|
||||||
|
rk_tbb_warning("${ARGV}")
|
||||||
|
endif()
|
||||||
|
return()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Extract a list of required and optional components.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
macro(rk_tbb_list_components)
|
||||||
|
# cmake provides the TBB_FIND_COMPONENTS and
|
||||||
|
# TBB_FIND_REQUIRED_<C> variables based on the invocation
|
||||||
|
# of find_package.
|
||||||
|
if (TBB_FIND_COMPONENTS STREQUAL "")
|
||||||
|
set(_REQUIRED_COMPONENTS "tbb")
|
||||||
|
set(_OPTIONAL_COMPONENTS "tbbmalloc"
|
||||||
|
"tbbmalloc_proxy"
|
||||||
|
"tbbbind"
|
||||||
|
"tbbpreview")
|
||||||
|
else()
|
||||||
|
set(_REQUIRED_COMPONENTS "")
|
||||||
|
set(_OPTIONAL_COMPONENTS "")
|
||||||
|
foreach (C IN LISTS TBB_FIND_COMPONENTS)
|
||||||
|
if (${TBB_FIND_REQUIRED_${C}})
|
||||||
|
list(APPEND _REQUIRED_COMPONENTS ${C})
|
||||||
|
else()
|
||||||
|
list(APPEND _OPTIONAL_COMPONENTS ${C})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
rk_tbb_status("Looking for TBB components ${_REQUIRED_COMPONENTS}"
|
||||||
|
" (${_OPTIONAL_COMPONENTS})")
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# List components that are available, and check if any REQUIRED components
|
||||||
|
# are missing.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
macro(rk_tbb_check_components)
|
||||||
|
set(_TBB_MISSING_COMPONENTS "")
|
||||||
|
set(_TBB_AVAILABLE_COMPONENTS "")
|
||||||
|
|
||||||
|
foreach (C IN LISTS _REQUIRED_COMPONENTS)
|
||||||
|
if (TARGET TBB::${C})
|
||||||
|
list(APPEND _TBB_AVAILABLE_COMPONENTS ${C})
|
||||||
|
else()
|
||||||
|
list(APPEND _TBB_MISSING_COMPONENTS ${C})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
foreach (C IN LISTS _OPTIONAL_COMPONENTS)
|
||||||
|
if (TARGET TBB::${C})
|
||||||
|
list(APPEND _TBB_AVAILABLE_COMPONENTS ${C})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Check the version of the TBB root we found.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
macro(rk_tbb_check_version)
|
||||||
|
# Extract the version we found in our root.
|
||||||
|
if(EXISTS "${TBB_INCLUDE_DIR}/oneapi/tbb/version.h")
|
||||||
|
set(_TBB_VERSION_HEADER "oneapi/tbb/version.h")
|
||||||
|
elseif(EXISTS "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h")
|
||||||
|
set(_TBB_VERSION_HEADER "tbb/tbb_stddef.h")
|
||||||
|
elseif(EXISTS "${TBB_INCLUDE_DIR}/tbb/version.h")
|
||||||
|
set(_TBB_VERSION_HEADER "tbb/version.h")
|
||||||
|
else()
|
||||||
|
rk_tbb_error("Missing TBB version information. Could not find"
|
||||||
|
"tbb/tbb_stddef.h or tbb/version.h in ${TBB_INCLUDE_DIR}")
|
||||||
|
endif()
|
||||||
|
file(READ "${TBB_INCLUDE_DIR}/${_TBB_VERSION_HEADER}" VERSION_HEADER_CONTENT)
|
||||||
|
string(REGEX MATCH "#define TBB_VERSION_MAJOR ([0-9]+)" DUMMY "${VERSION_HEADER_CONTENT}")
|
||||||
|
set(TBB_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||||
|
string(REGEX MATCH "#define TBB_VERSION_MINOR ([0-9]+)" DUMMY "${VERSION_HEADER_CONTENT}")
|
||||||
|
set(TBB_VERSION_MINOR ${CMAKE_MATCH_1})
|
||||||
|
set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}")
|
||||||
|
set(TBB_VERSION_STRING "${TBB_VERSION}")
|
||||||
|
|
||||||
|
# If the user provided information about required versions, check them!
|
||||||
|
if (TBB_FIND_VERSION)
|
||||||
|
if (${TBB_FIND_VERSION_EXACT} AND NOT
|
||||||
|
TBB_VERSION VERSION_EQUAL ${TBB_FIND_VERSION})
|
||||||
|
rk_tbb_error("Requested exact TBB version ${TBB_FIND_VERSION},"
|
||||||
|
" but found ${TBB_VERSION}")
|
||||||
|
elseif(TBB_VERSION VERSION_LESS ${TBB_FIND_VERSION})
|
||||||
|
rk_tbb_error("Requested minimum TBB version ${TBB_FIND_VERSION},"
|
||||||
|
" but found ${TBB_VERSION}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
rk_tbb_status("Found TBB version ${TBB_VERSION} at ${TBB_ROOT}")
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Reuse existing targets.
|
||||||
|
# NOTE: This must be a macro, as we rely on return() to exit this script.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
macro(rk_tbb_reuse_existing_target_components)
|
||||||
|
rk_tbb_check_components()
|
||||||
|
|
||||||
|
if (_TBB_MISSING_COMPONENTS STREQUAL "")
|
||||||
|
rk_tbb_status("Found existing TBB component targets: ${_TBB_AVAILABLE_COMPONENTS}")
|
||||||
|
|
||||||
|
# Get TBB_INCLUDE_DIR if not already set to check for the version of the
|
||||||
|
# existing component targets (making the assumption that they all have
|
||||||
|
# the same version)
|
||||||
|
if (NOT TBB_INCLUDE_DIR)
|
||||||
|
list(GET _TBB_AVAILABLE_COMPONENTS 0 first_target)
|
||||||
|
get_target_property(TBB_INCLUDE_DIR TBB::${first_target} INTERFACE_INCLUDE_DIRECTORIES)
|
||||||
|
foreach(TGT IN LISTS _TBB_AVAILABLE_COMPONENTS)
|
||||||
|
get_target_property(_TGT_INCLUDE_DIR TBB::${TGT} INTERFACE_INCLUDE_DIRECTORIES)
|
||||||
|
if (NOT _TGT_INCLUDE_DIR STREQUAL "${TBB_INCLUDE_DIR}")
|
||||||
|
rk_tbb_error("Existing TBB component targets have inconsistent include directories.")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_path(TBB_INCLUDE_DIR
|
||||||
|
NAMES "${_TBB_HEADER}"
|
||||||
|
PATHS "${TBB_INCLUDE_DIRS}")
|
||||||
|
|
||||||
|
# Extract TBB_ROOT from the include path so that rk_tbb_check_version
|
||||||
|
# prints the correct tbb location
|
||||||
|
string(REPLACE "/${_TBB_INCLUDE_SUBDIR}" "" TBB_ROOT "${TBB_INCLUDE_DIR}")
|
||||||
|
rk_tbb_check_version()
|
||||||
|
|
||||||
|
# Add target TBB and link all available components
|
||||||
|
if (NOT TARGET TBB)
|
||||||
|
add_library(TBB INTERFACE)
|
||||||
|
foreach(C IN LISTS _TBB_AVAILABLE_COMPONENTS)
|
||||||
|
target_link_libraries(TBB INTERFACE TBB::${C})
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
set(TBB_FOUND TRUE)
|
||||||
|
set(TBB_INCLUDE_DIRS "${TBB_INCLUDE_DIR}")
|
||||||
|
return()
|
||||||
|
elseif ((TARGET TBB) OR (NOT _TBB_AVAILABLE_COMPONENTS STREQUAL ""))
|
||||||
|
rk_tbb_error("Ignoring existing TBB targets because required components are missing: ${_TBB_MISSING_COMPONENTS}")
|
||||||
|
endif()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Find the root directory if a manual override is not specified.
|
||||||
|
# Sets TBB_ROOT in the parent scope, but does not check for failure.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
function(rk_tbb_find_root)
|
||||||
|
if (NOT TBB_ROOT OR TBB_ROOT STREQUAL "")
|
||||||
|
set(TBB_HINTS "")
|
||||||
|
set(TBB_PATHS "")
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
# workaround for parentheses in variable name / CMP0053
|
||||||
|
set(PROGRAMFILESx86 "PROGRAMFILES(x86)")
|
||||||
|
set(PROGRAMFILES32 "$ENV{${PROGRAMFILESx86}}")
|
||||||
|
if(NOT PROGRAMFILES32)
|
||||||
|
set(PROGRAMFILES32 "$ENV{PROGRAMFILES}")
|
||||||
|
endif()
|
||||||
|
if(NOT PROGRAMFILES32)
|
||||||
|
set(PROGRAMFILES32 "C:/Program Files (x86)")
|
||||||
|
endif()
|
||||||
|
set(TBB_PATHS
|
||||||
|
"${PROJECT_SOURCE_DIR}/../tbb"
|
||||||
|
"${PROGRAMFILES32}/IntelSWTools/compilers_and_libraries/windows/tbb"
|
||||||
|
"${PROGRAMFILES32}/Intel/Composer XE/tbb"
|
||||||
|
"${PROGRAMFILES32}/Intel/compilers_and_libraries/windows/tbb")
|
||||||
|
else()
|
||||||
|
set(TBB_HINTS "/usr/local")
|
||||||
|
set(TBB_PATHS
|
||||||
|
"${PROJECT_SOURCE_DIR}/tbb"
|
||||||
|
"/opt/intel/composerxe/tbb"
|
||||||
|
"/opt/intel/compilers_and_libraries/tbb"
|
||||||
|
"/opt/intel/compilers_and_libraries/linux/tbb"
|
||||||
|
"/opt/intel/tbb")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(TBB_ROOT "TBB_ROOT-NOTFOUND")
|
||||||
|
find_path(TBB_ROOT
|
||||||
|
NAMES "${_TBB_INCLUDE_SUBDIR}/${_TBB_HEADER}"
|
||||||
|
HINTS ${TBB_HINTS}
|
||||||
|
PATHS ${TBB_PATHS}
|
||||||
|
NO_PACKAGE_ROOT_PATH)
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Find the include directory if a manual override is not specified.
|
||||||
|
# Assumes TBB_ROOT to be set.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
function(rk_tbb_find_include_directory)
|
||||||
|
find_path(TBB_INCLUDE_DIR
|
||||||
|
NAMES "${_TBB_HEADER}"
|
||||||
|
HINTS "${TBB_ROOT}/${_TBB_INCLUDE_SUBDIR}"
|
||||||
|
NO_PACKAGE_ROOT_PATH)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Find a specific library and create a target for it.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
function(rk_tbb_find_library COMPONENT_NAME BUILD_CONFIG)
|
||||||
|
set(LIB_VAR "${COMPONENT_NAME}_LIBRARY_${BUILD_CONFIG}")
|
||||||
|
set(BIN_DIR_VAR "${COMPONENT_NAME}_BIN_DIR_${BUILD_CONFIG}")
|
||||||
|
set(DLL_VAR "${COMPONENT_NAME}_DLL_${BUILD_CONFIG}")
|
||||||
|
if (BUILD_CONFIG STREQUAL "DEBUG")
|
||||||
|
set(LIB_NAME "${COMPONENT_NAME}_debug")
|
||||||
|
else()
|
||||||
|
set(LIB_NAME "${COMPONENT_NAME}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
unset(LIB_PATHS)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
set(TBB_ARCH intel64)
|
||||||
|
else()
|
||||||
|
set(TBB_ARCH ia32)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC10)
|
||||||
|
set(TBB_VCVER vc10)
|
||||||
|
elseif(MSVC11)
|
||||||
|
set(TBB_VCVER vc11)
|
||||||
|
elseif(MSVC12)
|
||||||
|
set(TBB_VCVER vc12)
|
||||||
|
else()
|
||||||
|
set(TBB_VCVER vc14)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(LIB_PATHS
|
||||||
|
${TBB_ROOT}/lib/${TBB_ARCH}/${TBB_VCVER}
|
||||||
|
${TBB_ROOT}/lib
|
||||||
|
)
|
||||||
|
|
||||||
|
# On window, also search the DLL so that the client may install it.
|
||||||
|
set(DLL_NAME "${LIB_NAME}.dll")
|
||||||
|
|
||||||
|
# lib name with version suffix to handle oneTBB tbb12.dll
|
||||||
|
set(LIB_NAME_VERSION "")
|
||||||
|
if (${COMPONENT_NAME} STREQUAL "tbb")
|
||||||
|
if (BUILD_CONFIG STREQUAL "DEBUG")
|
||||||
|
set(LIB_NAME_VERSION "tbb12_debug")
|
||||||
|
else()
|
||||||
|
set(LIB_NAME_VERSION "tbb12")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
set(DLL_NAME_VERSION "${LIB_NAME_VERSION}.dll")
|
||||||
|
|
||||||
|
find_file(BIN_FILE
|
||||||
|
NAMES ${DLL_NAME} ${DLL_NAME_VERSION}
|
||||||
|
PATHS
|
||||||
|
"${TBB_ROOT}/bin/${TBB_ARCH}/${TBB_VCVER}"
|
||||||
|
"${TBB_ROOT}/bin"
|
||||||
|
"${TBB_ROOT}/redist/${TBB_ARCH}/${TBB_VCVER}"
|
||||||
|
"${TBB_ROOT}/../redist/${TBB_ARCH}/tbb/${TBB_VCVER}"
|
||||||
|
"${TBB_ROOT}/../redist/${TBB_ARCH}_win/tbb/${TBB_VCVER}"
|
||||||
|
NO_DEFAULT_PATH)
|
||||||
|
get_filename_component(${BIN_DIR_VAR} ${BIN_FILE} DIRECTORY)
|
||||||
|
set(${DLL_VAR} "${BIN_FILE}" CACHE PATH "${COMPONENT_NAME} ${BUILD_CONFIG} dll path")
|
||||||
|
elseif(APPLE)
|
||||||
|
set(LIB_PATHS ${TBB_ROOT}/lib)
|
||||||
|
else()
|
||||||
|
file(GLOB LIB_PATHS PATHS ${TBB_ROOT}/lib/intel64/gcc*)
|
||||||
|
list(REVERSE LIB_PATHS)
|
||||||
|
list(APPEND LIB_PATHS
|
||||||
|
${TBB_ROOT}/lib
|
||||||
|
${TBB_ROOT}/lib/x86_64-linux-gnu
|
||||||
|
${TBB_ROOT}/lib64
|
||||||
|
${TBB_ROOT}/libx86_64-linux-gnu)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# We prefer finding the versioned file on Unix so that the library path
|
||||||
|
# variable will not point to a symlink. This makes installing TBB as a
|
||||||
|
# dependency easier.
|
||||||
|
if (UNIX)
|
||||||
|
set(LIB_NAME lib${LIB_NAME}.so.2 ${LIB_NAME})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_library(${LIB_VAR}
|
||||||
|
NAMES ${LIB_NAME}
|
||||||
|
PATHS ${LIB_PATHS}
|
||||||
|
NO_DEFAULT_PATH)
|
||||||
|
|
||||||
|
# Hide this variable if we found something, otherwise display it for
|
||||||
|
# easy override.
|
||||||
|
if(${LIB_VAR})
|
||||||
|
mark_as_advanced(${LIB_VAR})
|
||||||
|
endif()
|
||||||
|
if(${BIN_DIR_VAR})
|
||||||
|
mark_as_advanced(${BIN_DIR_VAR})
|
||||||
|
endif()
|
||||||
|
if(${DLL_VAR})
|
||||||
|
mark_as_advanced(${DLL_VAR})
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
# Find the given component.
|
||||||
|
# This macro attempts to find both release and debug versions, and falls back
|
||||||
|
# appropriately if only one can be found.
|
||||||
|
# On success, it creates a target ${TARGET}::${COMPONENT_NAME} and links
|
||||||
|
# it to the overall ${TARGET}.
|
||||||
|
#
|
||||||
|
# For more information on the variables set here, see
|
||||||
|
# https://cmake.org/cmake/help/v3.17/manual/cmake-developer.7.html#a-sample-find-module
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
function(rk_tbb_find_and_link_component COMPONENT_NAME)
|
||||||
|
set(COMPONENT_TARGET "TBB::${COMPONENT_NAME}")
|
||||||
|
|
||||||
|
rk_tbb_find_library("${COMPONENT_NAME}" RELEASE)
|
||||||
|
rk_tbb_find_library("${COMPONENT_NAME}" DEBUG)
|
||||||
|
|
||||||
|
if (${COMPONENT_NAME}_LIBRARY_RELEASE OR ${COMPONENT_NAME}_LIBRARY_DEBUG)
|
||||||
|
# Note: We *must* use SHARED here rather than UNKNOWN as our
|
||||||
|
# IMPORTED_NO_SONAME trick a few lines down does not work with
|
||||||
|
# UNKNOWN.
|
||||||
|
add_library(${COMPONENT_TARGET} SHARED IMPORTED)
|
||||||
|
|
||||||
|
if (${COMPONENT_NAME}_LIBRARY_RELEASE)
|
||||||
|
set_property(TARGET ${COMPONENT_TARGET} APPEND PROPERTY
|
||||||
|
IMPORTED_CONFIGURATIONS RELEASE)
|
||||||
|
if(WIN32)
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_LOCATION_RELEASE "${${COMPONENT_NAME}_DLL_RELEASE}")
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_IMPLIB_RELEASE "${${COMPONENT_NAME}_LIBRARY_RELEASE}")
|
||||||
|
else()
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_LOCATION_RELEASE "${${COMPONENT_NAME}_LIBRARY_RELEASE}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (${COMPONENT_NAME}_LIBRARY_DEBUG)
|
||||||
|
set_property(TARGET ${COMPONENT_TARGET} APPEND PROPERTY
|
||||||
|
IMPORTED_CONFIGURATIONS DEBUG)
|
||||||
|
if(WIN32)
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_LOCATION_DEBUG "${${COMPONENT_NAME}_DLL_DEBUG}")
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_IMPLIB_DEBUG "${${COMPONENT_NAME}_LIBRARY_DEBUG}")
|
||||||
|
else()
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_LOCATION_DEBUG "${${COMPONENT_NAME}_LIBRARY_DEBUG}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}"
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if(NOT WIN32)
|
||||||
|
# Note: IMPORTED_NO_SONAME must be set or cmake will attempt
|
||||||
|
# to link to the full path of libtbb.so. Instead, we
|
||||||
|
# rely on the linker to find libtbb.so.2.
|
||||||
|
set_target_properties(${COMPONENT_TARGET} PROPERTIES
|
||||||
|
IMPORTED_NO_SONAME TRUE
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_link_libraries(TBB INTERFACE ${COMPONENT_TARGET})
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
# Note: The order of these is important.
|
||||||
|
# Some of these macros create variables that are used in later calls.
|
||||||
|
rk_tbb_list_components()
|
||||||
|
rk_tbb_reuse_existing_target_components()
|
||||||
|
|
||||||
|
rk_tbb_find_root()
|
||||||
|
if (NOT EXISTS "${TBB_ROOT}")
|
||||||
|
rk_tbb_error("Unable to find root directory ${TBB_ROOT}")
|
||||||
|
endif()
|
||||||
|
mark_as_advanced(TBB_ROOT) # Hide, we found something.
|
||||||
|
|
||||||
|
rk_tbb_find_include_directory()
|
||||||
|
if (NOT EXISTS "${TBB_INCLUDE_DIR}")
|
||||||
|
rk_tbb_error("Unable to find include directory ${TBB_INCLUDE_DIR}")
|
||||||
|
endif()
|
||||||
|
mark_as_advanced(TBB_INCLUDE_DIR) # Hide, we found something.
|
||||||
|
|
||||||
|
rk_tbb_check_version()
|
||||||
|
|
||||||
|
add_library(TBB INTERFACE)
|
||||||
|
|
||||||
|
foreach(C IN LISTS _REQUIRED_COMPONENTS _OPTIONAL_COMPONENTS)
|
||||||
|
rk_tbb_find_and_link_component(${C})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
rk_tbb_check_components()
|
||||||
|
if (_TBB_MISSING_COMPONENTS)
|
||||||
|
rk_tbb_error("Cannot find required components: "
|
||||||
|
"${_TBB_MISSING_COMPONENTS}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(TBB_FOUND TRUE)
|
||||||
|
set(TBB_INCLUDE_DIRS "${TBB_INCLUDE_DIR}")
|
||||||
13
Framework/external/embree/common/cmake/check_arm_neon.cpp
vendored
Normal file
13
Framework/external/embree/common/cmake/check_arm_neon.cpp
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
// Copyright 2009-2020 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#if !defined(__ARM_NEON)
|
||||||
|
#error "No ARM Neon support"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return vaddvq_s32(vdupq_n_s32(1));
|
||||||
|
}
|
||||||
20
Framework/external/embree/common/cmake/check_globals.cmake
vendored
Normal file
20
Framework/external/embree/common/cmake/check_globals.cmake
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
IF (WIN32 OR APPLE)
|
||||||
|
return()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
execute_process(COMMAND objdump -C -t ${file} OUTPUT_VARIABLE output)
|
||||||
|
string(REPLACE "\n" ";" output ${output})
|
||||||
|
|
||||||
|
foreach (line ${output})
|
||||||
|
if ("${line}" MATCHES "O .bss")
|
||||||
|
if (NOT "${line}" MATCHES "std::__ioinit" AND # this is caused by iostream initialization and is likely also ok
|
||||||
|
NOT "${line}" MATCHES "\\(\\)::" AND # this matches a static inside a function which is fine
|
||||||
|
NOT "${line}" MATCHES "function_local_static_" AND # static variable inside a function (explicitly named)
|
||||||
|
NOT "${line}" MATCHES "__\\$U") # ICC generated locks for static variable inside a function
|
||||||
|
message(WARNING "\nProblematic global variable in non-SSE code:\n" ${line})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
36
Framework/external/embree/common/cmake/check_isa.cpp
vendored
Normal file
36
Framework/external/embree/common/cmake/check_isa.cpp
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
// ======================================================================== //
|
||||||
|
// Copyright 2017 Kitware, Inc. //
|
||||||
|
// //
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); //
|
||||||
|
// you may not use this file except in compliance with the License. //
|
||||||
|
// You may obtain a copy of the License at //
|
||||||
|
// //
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 //
|
||||||
|
// //
|
||||||
|
// Unless required by applicable law or agreed to in writing, software //
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, //
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
|
||||||
|
// See the License for the specific language governing permissions and //
|
||||||
|
// limitations under the License. //
|
||||||
|
// ======================================================================== //
|
||||||
|
|
||||||
|
#if \
|
||||||
|
defined(__AVX512F__) && defined(__AVX512CD__) && \
|
||||||
|
defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
|
||||||
|
char const *info_isa = "ISA" ":" "AVX512";
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
char const *info_isa = "ISA" ":" "AVX2";
|
||||||
|
#elif defined(__AVX__)
|
||||||
|
char const *info_isa = "ISA" ":" "AVX";
|
||||||
|
#elif defined(__SSE4_2__)
|
||||||
|
char const *info_isa = "ISA" ":" "SSE42";
|
||||||
|
#else // defined(__SSE2__)
|
||||||
|
char const *info_isa = "ISA" ":" "SSE2";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int require = 0;
|
||||||
|
require += info_isa[argc];
|
||||||
|
return require;
|
||||||
|
}
|
||||||
43
Framework/external/embree/common/cmake/check_isa_default.cmake
vendored
Normal file
43
Framework/external/embree/common/cmake/check_isa_default.cmake
vendored
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
## ======================================================================== ##
|
||||||
|
## Copyright 2017 Kitware, Inc. ##
|
||||||
|
## ##
|
||||||
|
## Licensed under the Apache License, Version 2.0 (the "License"); ##
|
||||||
|
## you may not use this file except in compliance with the License. ##
|
||||||
|
## You may obtain a copy of the License at ##
|
||||||
|
## ##
|
||||||
|
## http://www.apache.org/licenses/LICENSE-2.0 ##
|
||||||
|
## ##
|
||||||
|
## Unless required by applicable law or agreed to in writing, software ##
|
||||||
|
## distributed under the License is distributed on an "AS IS" BASIS, ##
|
||||||
|
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
|
||||||
|
## See the License for the specific language governing permissions and ##
|
||||||
|
## limitations under the License. ##
|
||||||
|
## ======================================================================== ##
|
||||||
|
|
||||||
|
SET(CHECK_ISA_DIR "${CMAKE_CURRENT_LIST_DIR}")
|
||||||
|
FUNCTION(CHECK_ISA_DEFAULT OUTVAR)
|
||||||
|
|
||||||
|
TRY_COMPILE(COMPILER_SUPPORTS_ARM_NEON "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_arm_neon.cpp")
|
||||||
|
IF (COMPILER_SUPPORTS_ARM_NEON)
|
||||||
|
SET(ISA_DEFAULT "NEON")
|
||||||
|
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
|
||||||
|
RETURN()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(ISA_DEFAULT_BIN "${CMAKE_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/check_isa_default.bin")
|
||||||
|
SET(SRC "${CHECK_ISA_DIR}/check_isa.cpp")
|
||||||
|
TRY_COMPILE(ISA_DEFAULT_COMPILE
|
||||||
|
"${CMAKE_BINARY_DIR}"
|
||||||
|
"${SRC}"
|
||||||
|
COPY_FILE "${ISA_DEFAULT_BIN}"
|
||||||
|
)
|
||||||
|
IF(NOT ISA_DEFAULT_COMPILE)
|
||||||
|
SET(ISA_DEFAULT "SSE2")
|
||||||
|
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
|
||||||
|
RETURN()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
FILE(STRINGS ${ISA_DEFAULT_BIN} ISA_DEFAULT REGEX "^ISA:")
|
||||||
|
STRING(REPLACE "ISA:" "" ISA_DEFAULT "${ISA_DEFAULT}")
|
||||||
|
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
|
||||||
|
ENDFUNCTION()
|
||||||
24
Framework/external/embree/common/cmake/check_stack_frame_size.cmake
vendored
Normal file
24
Framework/external/embree/common/cmake/check_stack_frame_size.cmake
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
IF (WIN32 OR APPLE)
|
||||||
|
return()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
execute_process(COMMAND objdump -d ${file} OUTPUT_VARIABLE output)
|
||||||
|
string(REPLACE "\n" ";" output ${output})
|
||||||
|
|
||||||
|
SET(lastfunc "")
|
||||||
|
foreach (line ${output})
|
||||||
|
if ("${line}" MATCHES "^.*<([^>]*)>:$")
|
||||||
|
SET(lastfunc ${CMAKE_MATCH_1})
|
||||||
|
endif()
|
||||||
|
if ("${line}" MATCHES ".*sub[ ]+[$]([^,]*),%rsp.*")
|
||||||
|
set(bytes ${CMAKE_MATCH_1})
|
||||||
|
if ("${bytes}" GREATER "4096")
|
||||||
|
if ("${lastfunc}" MATCHES ".*recurse.*")
|
||||||
|
message(WARNING "Large stack space requirement: ${lastfunc} size: ${bytes}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
146
Framework/external/embree/common/cmake/clang.cmake
vendored
Normal file
146
Framework/external/embree/common/cmake/clang.cmake
vendored
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
MACRO(_SET_IF_EMPTY VAR VALUE)
|
||||||
|
IF(NOT ${VAR})
|
||||||
|
SET(${VAR} "${VALUE}")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__")
|
||||||
|
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__")
|
||||||
|
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
|
||||||
|
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
|
||||||
|
ELSE ()
|
||||||
|
# for `thread` keyword
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "-msse -msse2 -mno-sse4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
|
||||||
|
SET(COMMON_CXX_FLAGS "")
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
|
||||||
|
# SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} -Xclang -fcxx-exceptions") # enable C++ exceptions in Clang
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /w") # disable all warnings
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Gy") # package individual functions
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
|
||||||
|
ELSE()
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(SECURE_LINKER_FLAGS "")
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
|
||||||
|
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
|
||||||
|
INCLUDE(msvc_post)
|
||||||
|
ELSE()
|
||||||
|
|
||||||
|
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
|
||||||
|
OPTION(EMBREE_ADDRESS_SANITIZER "Enabled CLANG address sanitizer." OFF)
|
||||||
|
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
|
||||||
|
SET(CMAKE_CXX_FLAGS "")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
|
||||||
|
IF (NOT APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
IF (EMBREE_ADDRESS_SANITIZER)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer -fno-optimize-sibling-calls")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMSCRIPTEN)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") # enable exceptions
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # enable threads
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msimd128") # enable SIMD intrinsics
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
IF (APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
|
||||||
|
ELSE(APPLE)
|
||||||
|
IF (NOT EMBREE_ADDRESS_SANITIZER) # for address sanitizer this causes link errors
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
|
||||||
|
IF (NOT EMSCRIPTEN)
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF(APPLE)
|
||||||
|
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
23
Framework/external/embree/common/cmake/crayprgenv.cmake
vendored
Normal file
23
Framework/external/embree/common/cmake/crayprgenv.cmake
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
## ======================================================================== ##
|
||||||
|
## Copyright 2017 Kitware, Inc. ##
|
||||||
|
## ##
|
||||||
|
## Licensed under the Apache License, Version 2.0 (the "License"); ##
|
||||||
|
## you may not use this file except in compliance with the License. ##
|
||||||
|
## You may obtain a copy of the License at ##
|
||||||
|
## ##
|
||||||
|
## http://www.apache.org/licenses/LICENSE-2.0 ##
|
||||||
|
## ##
|
||||||
|
## Unless required by applicable law or agreed to in writing, software ##
|
||||||
|
## distributed under the License is distributed on an "AS IS" BASIS, ##
|
||||||
|
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
|
||||||
|
## See the License for the specific language governing permissions and ##
|
||||||
|
## limitations under the License. ##
|
||||||
|
## ======================================================================== ##
|
||||||
|
SET(FLAGS_SSE2 "-target-cpu=x86_64")
|
||||||
|
SET(FLAGS_SSE42 "NOT_SUPPORTED")
|
||||||
|
SET(FLAGS_AVX "-target-cpu=sandybridge")
|
||||||
|
SET(FLAGS_AVX2 "-target-cpu=haswell")
|
||||||
|
SET(FLAGS_AVX512 "-target-cpu=x86-skylake")
|
||||||
|
|
||||||
|
STRING(TOLOWER "${CMAKE_CXX_COMPILER_ID}" _lower_compiler_id)
|
||||||
|
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/${_lower_compiler_id}.cmake" OPTIONAL)
|
||||||
4
Framework/external/embree/common/cmake/create_isa_dummy_file.cmake
vendored
Normal file
4
Framework/external/embree/common/cmake/create_isa_dummy_file.cmake
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
file(WRITE ${dst} "#include \"${src}\"\n")
|
||||||
0
Framework/external/embree/common/cmake/dep_level_zero.cmake
vendored
Normal file
0
Framework/external/embree/common/cmake/dep_level_zero.cmake
vendored
Normal file
292
Framework/external/embree/common/cmake/dpcpp.cmake
vendored
Normal file
292
Framework/external/embree/common/cmake/dpcpp.cmake
vendored
Normal file
|
|
@ -0,0 +1,292 @@
|
||||||
|
## Copyright 2009-2022 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
MACRO(_SET_IF_EMPTY VAR VALUE)
|
||||||
|
IF(NOT ${VAR})
|
||||||
|
SET(${VAR} "${VALUE}")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "-msse2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
|
||||||
|
|
||||||
|
IF (NOT WIN32)
|
||||||
|
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
|
||||||
|
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
|
||||||
|
SET(CMAKE_CXX_FLAGS "")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||||
|
GET_FILENAME_COMPONENT(SYCL_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME_WE)
|
||||||
|
IF (NOT SYCL_COMPILER_NAME STREQUAL "clang++")
|
||||||
|
SET(SYCL_ONEAPI TRUE)
|
||||||
|
IF (SYCL_COMPILER_NAME STREQUAL "icx" OR SYCL_COMPILER_NAME STREQUAL "icpx")
|
||||||
|
SET(SYCL_ONEAPI_ICX TRUE)
|
||||||
|
ELSE()
|
||||||
|
SET(SYCL_ONEAPI_ICX FALSE)
|
||||||
|
ENDIF()
|
||||||
|
SET(STORE_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||||
|
SET(STORE_CMAKE_CXX_LINK_FLAGS ${CMAKE_CXX_LINK_FLAGS})
|
||||||
|
IF (NOT EMBREE_SYCL_SUPPORT)
|
||||||
|
# if EMBREE_SYCL_SUPPORT is off we don't want the -fsycl flags
|
||||||
|
SET(CMAKE_CXX_FLAGS ${STORE_CMAKE_CXX_FLAGS})
|
||||||
|
SET(CMAKE_CXX_LINK_FLAGS ${STORE_CMAKE_CXX_LINK_FLAGS})
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
SET(SYCL_ONEAPI FALSE)
|
||||||
|
ADD_DEFINITIONS(-D__INTEL_LLVM_COMPILER)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-sycl") # makes dpcpp compiler compatible with clang++
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "-fsycl -fsycl-unnamed-lambda -Xclang -fsycl-allow-func-ptr")
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-mismatched-tags -Wno-pessimizing-move -Wno-reorder -Wno-unneeded-internal-declaration -Wno-delete-non-abstract-non-virtual-dtor -Wno-dangling-field -Wno-unknown-pragmas -Wno-logical-op-parentheses")
|
||||||
|
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} /debug:none") # FIXME: debug information generation takes forever in SYCL
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} /DNDEBUG") # FIXME: debug information generation takes forever in SYCL
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -g0") # FIXME: debug information generation takes forever in SYCL
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -UDEBUG -DNDEBUG") # FIXME: assertion still not working in SYCL
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
SET(SYCL_COMPILER_LIB_DIR "${SYCL_COMPILER_DIR}/../lib")
|
||||||
|
IF (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
file(GLOB SYCL_LIB RELATIVE ${SYCL_COMPILER_LIB_DIR}
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycld.lib
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycl[0-9]d.lib
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycl[0-9][0-9]d.lib)
|
||||||
|
ELSE()
|
||||||
|
file(GLOB SYCL_LIB RELATIVE ${SYCL_COMPILER_LIB_DIR}
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycl.lib
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycl[0-9].lib
|
||||||
|
${SYCL_COMPILER_LIB_DIR}/sycl[0-9][0-9].lib)
|
||||||
|
ENDIF()
|
||||||
|
GET_FILENAME_COMPONENT(SYCL_LIB_NAME ${SYCL_LIB} NAME_WE)
|
||||||
|
ELSE()
|
||||||
|
SET(SYCL_LIB_NAME "sycl")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_LINK_FLAGS_SYCL "-fsycl")
|
||||||
|
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableOCLNoInlineAttr=0") # enabled __noinline
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "ControlKernelTotalSize=0")
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "SubroutineThreshold=110000") # Minimal kernel size to enable subroutines
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableUnmaskedFunctions=1") # enables unmasked functions
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "ByPassAllocaSizeHeuristic=64") # puts small arrays into registers
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableIndirectCallOptimization=0") # Enables inlining indirect calls by comparing function addresses
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "FunctionControl=0") # 0 = default, 1 = inline, 2 = subroutine, 3 = stackcall, 4 = keep indirect calls
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "forceGlobalRA=1") # "force global register allocator
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "TotalGRFNum=128") # Total GRF used for register allocation
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "GRFNumToUse=64") # "Set the number of general registers to use (64 to totalGRFNum)
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "ReplaceIndirectCallWithJmpi=1") # Replace indirect call with jmpi instruction (HW WA)
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "DisableUniformAnalysis=1") # Setting this to 1/true adds a compiler switch to disable uniform_analysis
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "DisableLoopUnroll=1") # Setting this to 1/true adds a compiler switch to disable loop unrolling
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableStatelessToStatefull=0") # Enable Stateless To Statefull transformation for global and constant address space in OpenCL kernels
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableRecursionOpenCL=1") # Enable recursion with OpenCL user functions
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableAdvMemOpt=0") # Enable advanced memory optimization
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "UniformMemOptLimit=512") # "Limit of uniform memory optimization in bits
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "EnablePreemption=0") # Enable generating preeemptable code (SKL+)
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "AllowSubroutineAndInirectdCalls=1") # Allow subroutine in the presence of indirect calls
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "AllocaRAPressureThreshold=0") # The threshold for the register pressure potential (this reduces amount of spilling!)
|
||||||
|
#LIST(APPEND CMAKE_IGC_OPTIONS "AssumeInt64Support=0") # Architecture with partial int64 still promote uniform arrays to registers
|
||||||
|
LIST(APPEND CMAKE_IGC_OPTIONS "VISAOptions=-scratchAllocForStackInKB 128 ") # this works around some IGC bug in spill compression
|
||||||
|
|
||||||
|
IF (CMAKE_BUILD_TYPE STREQUAL "Debug") # to allow printf inside indirectly callable function
|
||||||
|
LIST(APPEND CMAKE_IGC_OPTIONS "ForceInlineStackCallWithImplArg=0")
|
||||||
|
LIST(APPEND CMAKE_IGC_OPTIONS "EnableGlobalStateBuffer=1")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
STRING(REPLACE ";" "," CMAKE_IGC_OPTIONS "${CMAKE_IGC_OPTIONS}")
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_AOT_DEVICE_REVISION GREATER 0)
|
||||||
|
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -revision_id ${EMBREE_SYCL_AOT_DEVICE_REVISION}") # Enable this to override the stepping/RevId
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -cl-intel-greater-than-4GB-buffer-required") # enables support for buffers larger than 4GB
|
||||||
|
IF (EMBREE_SYCL_LARGEGRF)
|
||||||
|
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -internal_options -cl-intel-256-GRF-per-thread") # large GRF mode
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_OCL_OTHER_OPTIONS "${CMAKE_OCL_OTHER_OPTIONS} -cl-intel-force-global-mem-allocation -cl-intel-no-local-to-generic")
|
||||||
|
#SET(CMAKE_OCL_OTHER_OPTIONS "${CMAKE_OCL_OTHER_OPTIONS} -cl-intel-private-memory-minimal-size-per-thread 8192")
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_AOT_DEVICES STREQUAL "none")
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-fsycl-targets=spir64")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-fsycl-targets=spir64,spir64_gen")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_LINK_FLAGS_SYCL_AOT "${CMAKE_CXX_FLAGS_SYCL_AOT} -Xsycl-target-backend=spir64 \"${CMAKE_OCL_OPTIONS} -options \\\"${CMAKE_OCL_OTHER_OPTIONS} -igc_opts='${CMAKE_IGC_OPTIONS}'\\\"\"")
|
||||||
|
|
||||||
|
IF (NOT EMBREE_SYCL_AOT_DEVICES STREQUAL "none")
|
||||||
|
SET(CMAKE_LINK_FLAGS_SYCL_AOT "${CMAKE_LINK_FLAGS_SYCL_AOT} -Xsycl-target-backend=spir64_gen \"-device ${EMBREE_SYCL_AOT_DEVICES} ${CMAKE_OCL_OPTIONS} -options \\\"${CMAKE_OCL_OTHER_OPTIONS} -igc_opts='${CMAKE_IGC_OPTIONS}'\\\"\"")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_DBG)
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-g")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} ${CMAKE_CXX_FLAGS_SYCL_AOT}")
|
||||||
|
SET(CMAKE_LINK_FLAGS_SYCL "${CMAKE_LINK_FLAGS_SYCL} ${CMAKE_LINK_FLAGS_SYCL_AOT}")
|
||||||
|
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pessimizing-move") # disabled: warning: moving a temporary object prevents copy elision [-Wpessimizing-move]
|
||||||
|
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
IF (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 2024.0)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I\"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl\" -I\"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl/sycl\"") # disable warning from SYCL header
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I\"${SYCL_COMPILER_DIR}/../include/sycl\" -I\"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||||
|
IF (SYCL_ONEAPI_ICX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 2024.0)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl/sycl\"") # disable warning from SYCL header
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# enable C++17 features
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF(EMBREE_SYCL_SUPPORT)
|
||||||
|
|
||||||
|
IF(SYCL_ONEAPI_ICX)
|
||||||
|
IF (WIN32)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qno-intel-lib")
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qno-intel-lib")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qimf-use-svml:false")
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qimf-use-svml:false")
|
||||||
|
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /Qno-intel-lib")
|
||||||
|
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} /Qno-intel-lib")
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /Qoption,link,/DEPENDENTLOADFLAG:0x2000")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /Qoption,link,/DEPENDENTLOADFLAG:0x2000")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -static-intel")
|
||||||
|
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -static-intel")
|
||||||
|
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fimf-use-svml=false")
|
||||||
|
#SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fimf-use-svml=false")
|
||||||
|
IF (NOT EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -no-intel-lib")
|
||||||
|
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -no-intel-lib")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-intel-lib")
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -no-intel-lib")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS") # protects against return address overrides
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
|
||||||
|
ELSE()
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
IF (SYCL_ONEAPI_ICX AND WIN32)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") # makes dpcpp compiler compatible with clang++
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR") # enable runtime type information (on by default)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fcxx-exceptions") # enable C++ exceptions in Clang
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Gy") # package individual functions
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffp-model=precise") # makes dpcpp compiler compatible with clang++
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
|
||||||
|
IF (NOT EMBREE_SYCL_SUPPORT)
|
||||||
|
IF (SYCL_ONEAPI_ICX)
|
||||||
|
IF (${MSVC_VERSION} VERSION_GREATER_EQUAL 1916)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++14")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++11")
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Oi")
|
||||||
|
ELSE()
|
||||||
|
IF (${MSVC_VERSION} VERSION_GREATER_EQUAL 1916)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE(msvc_post)
|
||||||
|
|
||||||
|
# workaround for file encoding problems of kernels/embree.rc found here https://gitlab.kitware.com/cmake/cmake/-/issues/18311
|
||||||
|
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
|
||||||
|
|
||||||
|
ELSE()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
|
||||||
|
OPTION(EMBREE_ADDRESS_SANITIZER "Enabled CLANG address sanitizer." OFF)
|
||||||
|
|
||||||
|
IF (EMBREE_ADDRESS_SANITIZER)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer -fno-optimize-sibling-calls")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
IF(EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "${CMAKE_CXX_FLAGS_RELWITHASSERT} -DDEBUG") # enable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "${CMAKE_CXX_FLAGS_RELWITHASSERT} -O3") # enable full optimizations
|
||||||
|
ENDIF(EMBREE_SYCL_SUPPORT)
|
||||||
|
|
||||||
|
IF (NOT EMBREE_ADDRESS_SANITIZER) # for address sanitizer this causes link errors
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
11
Framework/external/embree/common/cmake/embree-config-builddir.cmake
vendored
Normal file
11
Framework/external/embree/common/cmake/embree-config-builddir.cmake
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# use default install config
|
||||||
|
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/embree-config-install.cmake")
|
||||||
|
|
||||||
|
# and override path variables to match for build directory
|
||||||
|
SET(EMBREE_INCLUDE_DIRS @PROJECT_SOURCE_DIR@/include)
|
||||||
|
SET(EMBREE_LIBRARY @PROJECT_BINARY_DIR@/@EMBREE_LIBRARY_FULLNAME@)
|
||||||
|
SET(EMBREE_LIBRARIES ${EMBREE_LIBRARY})
|
||||||
|
|
||||||
16
Framework/external/embree/common/cmake/embree-config-version.cmake
vendored
Normal file
16
Framework/external/embree/common/cmake/embree-config-version.cmake
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
SET(PACKAGE_VERSION @EMBREE_VERSION@)
|
||||||
|
|
||||||
|
SET(PACKAGE_VERSION_EXACT 0)
|
||||||
|
SET(PACKAGE_VERSION_COMPATIBLE 0)
|
||||||
|
|
||||||
|
IF (PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
|
||||||
|
SET(PACKAGE_VERSION_EXACT 1)
|
||||||
|
SET(PACKAGE_VERSION_COMPATIBLE 1)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (PACKAGE_FIND_VERSION_MAJOR EQUAL @EMBREE_VERSION_MAJOR@ AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
|
||||||
|
SET(PACKAGE_VERSION_COMPATIBLE 1)
|
||||||
|
ENDIF()
|
||||||
112
Framework/external/embree/common/cmake/embree-config.cmake
vendored
Normal file
112
Framework/external/embree/common/cmake/embree-config.cmake
vendored
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
SET(EMBREE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/@EMBREE_RELATIVE_ROOT_DIR@")
|
||||||
|
GET_FILENAME_COMPONENT(EMBREE_ROOT_DIR "${EMBREE_ROOT_DIR}" ABSOLUTE)
|
||||||
|
|
||||||
|
SET(EMBREE_INCLUDE_DIRS "${EMBREE_ROOT_DIR}/@CMAKE_INSTALL_INCLUDEDIR@")
|
||||||
|
SET(EMBREE_LIBRARY "${EMBREE_ROOT_DIR}/@CMAKE_INSTALL_LIBDIR@/@EMBREE_LIBRARY_FULLNAME@")
|
||||||
|
SET(EMBREE_LIBRARIES ${EMBREE_LIBRARY})
|
||||||
|
|
||||||
|
SET(EMBREE_VERSION @EMBREE_VERSION@)
|
||||||
|
SET(EMBREE_VERSION_MAJOR @EMBREE_VERSION_MAJOR@)
|
||||||
|
SET(EMBREE_VERSION_MINOR @EMBREE_VERSION_MINOR@)
|
||||||
|
SET(EMBREE_VERSION_PATCH @EMBREE_VERSION_PATCH@)
|
||||||
|
SET(EMBREE_VERSION_NOTE "@EMBREE_VERSION_NOTE@")
|
||||||
|
|
||||||
|
SET(EMBREE_MAX_ISA @EMBREE_MAX_ISA@)
|
||||||
|
SET(EMBREE_ISA_SSE2 @EMBREE_ISA_SSE2@)
|
||||||
|
SET(EMBREE_ISA_SSE42 @EMBREE_ISA_SSE42@)
|
||||||
|
SET(EMBREE_ISA_AVX @EMBREE_ISA_AVX@)
|
||||||
|
SET(EMBREE_ISA_AVX2 @EMBREE_ISA_AVX2@)
|
||||||
|
SET(EMBREE_ISA_AVX512 @EMBREE_ISA_AVX512@)
|
||||||
|
SET(EMBREE_ISA_AVX512SKX @EMBREE_ISA_AVX512@) # just for compatibility
|
||||||
|
SET(EMBREE_ISA_NEON @EMBREE_ISA_NEON@)
|
||||||
|
SET(EMBREE_ISA_NEON2X @EMBREE_ISA_NEON2X@)
|
||||||
|
|
||||||
|
SET(EMBREE_BUILD_TYPE @CMAKE_BUILD_TYPE@)
|
||||||
|
SET(EMBREE_ISPC_SUPPORT @EMBREE_ISPC_SUPPORT@)
|
||||||
|
SET(EMBREE_STATIC_LIB @EMBREE_STATIC_LIB@)
|
||||||
|
SET(EMBREE_SYCL_SUPPORT @EMBREE_SYCL_SUPPORT@)
|
||||||
|
SET(EMBREE_SYCL_GEOMETRY_CALLBACK @EMBREE_SYCL_GEOMETRY_CALLBACK@)
|
||||||
|
SET(EMBREE_TUTORIALS @EMBREE_TUTORIALS@)
|
||||||
|
|
||||||
|
SET(EMBREE_RAY_MASK @EMBREE_RAY_MASK@)
|
||||||
|
SET(EMBREE_STAT_COUNTERS @EMBREE_STAT_COUNTERS@)
|
||||||
|
SET(EMBREE_BACKFACE_CULLING @EMBREE_BACKFACE_CULLING@)
|
||||||
|
SET(EMBREE_FILTER_FUNCTION @EMBREE_FILTER_FUNCTION@)
|
||||||
|
SET(EMBREE_IGNORE_INVALID_RAYS @EMBREE_IGNORE_INVALID_RAYS@)
|
||||||
|
SET(EMBREE_TASKING_SYSTEM @EMBREE_TASKING_SYSTEM@)
|
||||||
|
SET(EMBREE_TBB_COMPONENT @EMBREE_TBB_COMPONENT@)
|
||||||
|
SET(EMBREE_COMPACT_POLYS @EMBREE_COMPACT_POLYS@)
|
||||||
|
|
||||||
|
SET(EMBREE_GEOMETRY_TRIANGLE @EMBREE_GEOMETRY_TRIANGLE@)
|
||||||
|
SET(EMBREE_GEOMETRY_QUAD @EMBREE_GEOMETRY_QUAD@)
|
||||||
|
SET(EMBREE_GEOMETRY_CURVE @EMBREE_GEOMETRY_CURVE@)
|
||||||
|
SET(EMBREE_GEOMETRY_SUBDIVISION @EMBREE_GEOMETRY_SUBDIVISION@)
|
||||||
|
SET(EMBREE_GEOMETRY_USER @EMBREE_GEOMETRY_USER@)
|
||||||
|
SET(EMBREE_GEOMETRY_INSTANCE @EMBREE_GEOMETRY_INSTANCE@)
|
||||||
|
SET(EMBREE_GEOMETRY_INSTANCE_ARRAY @EMBREE_GEOMETRY_INSTANCE_ARRAY@)
|
||||||
|
SET(EMBREE_GEOMETRY_GRID @EMBREE_GEOMETRY_GRID@)
|
||||||
|
SET(EMBREE_GEOMETRY_POINT @EMBREE_GEOMETRY_POINT@)
|
||||||
|
|
||||||
|
SET(EMBREE_RAY_PACKETS @EMBREE_RAY_PACKETS@)
|
||||||
|
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT @EMBREE_MAX_INSTANCE_LEVEL_COUNT@)
|
||||||
|
SET(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR @EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR@)
|
||||||
|
SET(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE @EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE@)
|
||||||
|
SET(EMBREE_MIN_WIDTH @EMBREE_MIN_WIDTH@)
|
||||||
|
|
||||||
|
IF (EMBREE_STATIC_LIB AND (EMBREE_TASKING_SYSTEM STREQUAL "TBB"))
|
||||||
|
INCLUDE(CMakeFindDependencyMacro)
|
||||||
|
FIND_DEPENDENCY(TBB)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/sys-targets.cmake")
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/math-targets.cmake")
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/simd-targets.cmake")
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/lexers-targets.cmake")
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/tasking-targets.cmake")
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_SSE42)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_sse42-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX2)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx2-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_ISA_AVX512)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx512-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
|
||||||
|
SET(EMBREE_SYCL_AOT_DEVICES @EMBREE_SYCL_AOT_DEVICES@)
|
||||||
|
SET(EMBREE_SYCL_LARGEGRF @EMBREE_SYCL_LARGEGRF@)
|
||||||
|
SET(EMBREE_SYCL_RT_VALIDATION_API @EMBREE_SYCL_RT_VALIDATION_API@)
|
||||||
|
IF (EMBREE_SYCL_RT_VALIDATION_API)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_rthwif_sycl-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_rthwif-targets.cmake")
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/ze_wrapper-targets.cmake")
|
||||||
|
ENDIF()
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_sycl-targets.cmake")
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree-targets.cmake")
|
||||||
|
|
||||||
|
|
||||||
|
SET(EMBREE_TESTING_ONLY_SYCL_TESTS @EMBREE_TESTING_ONLY_SYCL_TESTS@)
|
||||||
|
SET(EMBREE_TESTING_INTENSITY @EMBREE_TESTING_INTENSITY@)
|
||||||
|
SET(EMBREE_TESTING_MEMCHECK @EMBREE_TESTING_MEMCHECK@)
|
||||||
|
SET(EMBREE_TESTING_BENCHMARK @EMBREE_TESTING_BENCHMARK@)
|
||||||
8
Framework/external/embree/common/cmake/embree.entitlements
vendored
Normal file
8
Framework/external/embree/common/cmake/embree.entitlements
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>com.apple.security.cs.disable-library-validation</key>
|
||||||
|
<true/>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
105
Framework/external/embree/common/cmake/gnu.cmake
vendored
Normal file
105
Framework/external/embree/common/cmake/gnu.cmake
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
MACRO(_SET_IF_EMPTY VAR VALUE)
|
||||||
|
IF(NOT ${VAR})
|
||||||
|
SET(${VAR} "${VALUE}")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__")
|
||||||
|
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__")
|
||||||
|
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
|
||||||
|
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
|
||||||
|
ELSE ()
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "-msse2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX512 "-mavx512f -mavx512dq -mavx512cd -mavx512bw -mavx512vl -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mprefer-vector-width=256")
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
|
||||||
|
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
|
||||||
|
SET(CMAKE_CXX_FLAGS "")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
|
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat 'char' as 'signed char'
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions") # allow lax vector type conversions
|
||||||
|
ENDIF (EMBREE_ARM)
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") # disables clearing an object of type ‘XXX’ with no trivial copy-assignment; use assignment or value-initialization instead
|
||||||
|
|
||||||
|
# these prevent compile to optimize away security checks
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around.
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
|
||||||
|
|
||||||
|
IF (NOT WIN32)
|
||||||
|
IF (NOT APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "")
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsigned-char") # treat 'char' as 'signed char'
|
||||||
|
ENDIF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "")
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fsigned-char") # treat 'char' as 'signed char'
|
||||||
|
ENDIF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fsigned-char") # treat 'char' as 'signed char'
|
||||||
|
ENDIF (EMBREE_ARM)
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
IF (APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
|
||||||
|
# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
|
||||||
|
ELSE(APPLE)
|
||||||
|
IF (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
ENDIF ()
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
|
||||||
|
ENDIF(APPLE)
|
||||||
|
|
||||||
25
Framework/external/embree/common/cmake/installTBB.cmake
vendored
Normal file
25
Framework/external/embree/common/cmake/installTBB.cmake
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
IF (EMBREE_INSTALL_DEPENDENCIES)
|
||||||
|
IF (TARGET TBB::${EMBREE_TBB_COMPONENT})
|
||||||
|
GET_TARGET_PROPERTY(LIB_PATH TBB::${EMBREE_TBB_COMPONENT} IMPORTED_LOCATION_RELEASE)
|
||||||
|
IF(WIN32)
|
||||||
|
INSTALL(FILES "${LIB_PATH}" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||||
|
GET_TARGET_PROPERTY(IMPLIB_PATH TBB::${EMBREE_TBB_COMPONENT} IMPORTED_IMPLIB_RELEASE)
|
||||||
|
INSTALL(FILES "${IMPLIB_PATH}" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
# Include tbbmalloc.dll even though we don't use or need tbb_malloc.dll because it is searched for by tbb.dll.
|
||||||
|
GET_FILENAME_COMPONENT(LIB_FOLDER ${LIB_PATH} DIRECTORY)
|
||||||
|
FILE(GLOB TBB_MALLOC_FILES LIST_DIRECTORIES false "${LIB_FOLDER}/tbbmalloc.dll" "${LIB_FOLDER}/tbbmalloc?.dll")
|
||||||
|
INSTALL(FILES "${TBB_MALLOC_FILES}" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||||
|
|
||||||
|
ELSE()
|
||||||
|
GET_FILENAME_COMPONENT(LIB_DIR "${LIB_PATH}" DIRECTORY)
|
||||||
|
FILE(GLOB LIB_FILES ${LIB_DIR}/libtbb.* ${LIB_DIR}/libtbbmalloc.so*)
|
||||||
|
INSTALL(FILES ${LIB_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
MESSAGE(SEND_ERROR "Target TBB::${EMBREE_TBB_COMPONENT} not found during install.")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
176
Framework/external/embree/common/cmake/intel.cmake
vendored
Normal file
176
Framework/external/embree/common/cmake/intel.cmake
vendored
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
MACRO(_SET_IF_EMPTY VAR VALUE)
|
||||||
|
IF(NOT ${VAR})
|
||||||
|
SET(${VAR} "${VALUE}")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "/QxSSE2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE42 "/QxSSE4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX "/arch:AVX")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX2 "/QxCORE-AVX2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX512 "/QxCORE-AVX512")
|
||||||
|
|
||||||
|
SET(COMMON_CXX_FLAGS "")
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qvec-") # disable auto vectorizer
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qfast-transcendentals-") # disable fast transcendentals, prevents sin(x),cos(x) -> sincos(x) optimization
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
|
||||||
|
ELSE()
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qdiag-disable:11074 ") # remark #11074: Inlining inhibited by limit max-size
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qdiag-disable:11075 ") # remark #11075: To get full report use -Qopt-report:4 -Qopt-report-phase ipo
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gy") # package individual functions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Qinline-max-total-size-") # no size limit when performing inlining
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Qinline-factor=150") # increase default inline factors by 2x
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
|
||||||
|
# SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /DTBB_USE_DEBUG") # configures TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gy") # package individual functions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Qinline-max-total-size-") # no size limit when performing inlining
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Qinline-factor=150") # increase default inline factors by 2x
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(SECURE_LINKER_FLAGS "")
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
|
||||||
|
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4217") # locally defined symbol XXX imported in function YYY (happens as the ISPC API layer uses exported library functions)
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4049") # warning LNK4049: locally defined symbol _rtcOccluded1M imported
|
||||||
|
|
||||||
|
INCLUDE(msvc_post)
|
||||||
|
|
||||||
|
# remove libmmd dependency
|
||||||
|
IF (NOT EMBREE_STATIC_RUNTIME)
|
||||||
|
# Use the default math library instead of libmmd[d]
|
||||||
|
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /nodefaultlib:libmmdd.lib")
|
||||||
|
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO " /nodefaultlib:libmmd.lib")
|
||||||
|
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_RELEASE " /nodefaultlib:libmmd.lib")
|
||||||
|
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_DEBUG " /nodefaultlib:libmmdd.lib")
|
||||||
|
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO " /nodefaultlib:libmmd.lib")
|
||||||
|
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_RELEASE " /nodefaultlib:libmmd.lib")
|
||||||
|
|
||||||
|
# Link the static version of SVML
|
||||||
|
string(APPEND CMAKE_EXE_LINKER_FLAGS " /defaultlib:svml_dispmt.lib")
|
||||||
|
string(APPEND CMAKE_SHARED_LINKER_FLAGS " /defaultlib:svml_dispmt.lib")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ELSE()
|
||||||
|
|
||||||
|
IF (APPLE)
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "-xssse3") # on MacOSX ICC does not support SSE2
|
||||||
|
ELSE()
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE2 "-xsse2")
|
||||||
|
ENDIF()
|
||||||
|
_SET_IF_EMPTY(FLAGS_SSE42 "-xsse4.2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX "-xAVX")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX2 "-xCORE-AVX2")
|
||||||
|
_SET_IF_EMPTY(FLAGS_AVX512 "-xCORE-AVX512")
|
||||||
|
|
||||||
|
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
|
||||||
|
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
|
||||||
|
SET(CMAKE_CXX_FLAGS "")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
|
||||||
|
|
||||||
|
# these prevent compile to optimize away security checks
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around.
|
||||||
|
|
||||||
|
IF (NOT APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftls-model=local-dynamic") # otherwise ICC2019 cannot compile code with -fPIE enabled
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-ansi-alias") # disables strict aliasing rules
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-vec") # disable auto vectorizer
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fasm-blocks") # enable assembly blocks
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict") # enable restrict keyword
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-inline-max-total-size") # no size limit when performing inlining
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-inline-max-per-compile") # no maximal number of inlinings per compilation unit
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -inline-factor=150") # increase default inline factors by 2x
|
||||||
|
|
||||||
|
IF(NOT CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-intel") # links intel runtime statically
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-intel-extensions") # disables some intel extensions which cause symbols to be exported
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
|
||||||
|
|
||||||
|
IF (APPLE)
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
|
||||||
|
ELSE(APPLE)
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
|
||||||
|
ENDIF(APPLE)
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
211
Framework/external/embree/common/cmake/ispc.cmake
vendored
Normal file
211
Framework/external/embree/common/cmake/ispc.cmake
vendored
Normal file
|
|
@ -0,0 +1,211 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# ##################################################################
|
||||||
|
# add macro INCLUDE_DIRECTORIES_ISPC() that allows to specify search
|
||||||
|
# paths for ISPC sources
|
||||||
|
# ##################################################################
|
||||||
|
SET(ISPC_INCLUDE_DIR "")
|
||||||
|
MACRO (INCLUDE_DIRECTORIES_ISPC)
|
||||||
|
SET(ISPC_INCLUDE_DIR ${ISPC_INCLUDE_DIR} ${ARGN})
|
||||||
|
ENDMACRO ()
|
||||||
|
|
||||||
|
IF (EMBREE_ISPC_SUPPORT)
|
||||||
|
|
||||||
|
# ISPC versions to look for, in descending order (newest first)
|
||||||
|
SET(ISPC_VERSION_WORKING "1.9.1" "1.9.0" "1.8.3" "1.8.2")
|
||||||
|
LIST(GET ISPC_VERSION_WORKING -1 ISPC_VERSION_REQUIRED)
|
||||||
|
|
||||||
|
IF (NOT EMBREE_ISPC_EXECUTABLE)
|
||||||
|
# try sibling folder as hint for path of ISPC
|
||||||
|
IF (APPLE)
|
||||||
|
SET(ISPC_DIR_SUFFIX "osx")
|
||||||
|
ELSEIF(WIN32)
|
||||||
|
SET(ISPC_DIR_SUFFIX "windows")
|
||||||
|
IF (MSVC14)
|
||||||
|
LIST(APPEND ISPC_DIR_SUFFIX "windows-vs2015")
|
||||||
|
ELSE()
|
||||||
|
LIST(APPEND ISPC_DIR_SUFFIX "windows-vs2013")
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
SET(ISPC_DIR_SUFFIX "linux")
|
||||||
|
ENDIF()
|
||||||
|
FOREACH(ver ${ISPC_VERSION_WORKING})
|
||||||
|
FOREACH(suffix ${ISPC_DIR_SUFFIX})
|
||||||
|
LIST(APPEND ISPC_DIR_HINT "${PROJECT_SOURCE_DIR}/../ispc-v${ver}-${suffix}")
|
||||||
|
ENDFOREACH()
|
||||||
|
ENDFOREACH()
|
||||||
|
|
||||||
|
FIND_PROGRAM(EMBREE_ISPC_EXECUTABLE ispc PATHS ${ISPC_DIR_HINT} DOC "Path to the ISPC executable.")
|
||||||
|
IF (NOT EMBREE_ISPC_EXECUTABLE)
|
||||||
|
MESSAGE(FATAL_ERROR "Intel SPMD Compiler (ISPC) not found. Disable EMBREE_ISPC_SUPPORT or install ISPC.")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# check ISPC version
|
||||||
|
EXECUTE_PROCESS(COMMAND ${EMBREE_ISPC_EXECUTABLE} --version
|
||||||
|
OUTPUT_VARIABLE ISPC_OUTPUT
|
||||||
|
RESULT_VARIABLE ISPC_RESULT)
|
||||||
|
|
||||||
|
IF (NOT ${ISPC_RESULT} STREQUAL "0")
|
||||||
|
MESSAGE(FATAL_ERROR "Error executing ISPC executable '${EMBREE_ISPC_EXECUTABLE}': ${ISPC_RESULT}")
|
||||||
|
ELSE()
|
||||||
|
MESSAGE(STATUS "Found working Intel SPMD Compiler (ISPC): ${EMBREE_ISPC_EXECUTABLE}")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
STRING(REGEX MATCH "([0-9]+[.][0-9]+[.][0-9]+)" DUMMY "${ISPC_OUTPUT}")
|
||||||
|
SET(ISPC_VERSION ${CMAKE_MATCH_1})
|
||||||
|
|
||||||
|
IF (ISPC_VERSION VERSION_LESS ISPC_VERSION_REQUIRED)
|
||||||
|
MESSAGE(FATAL_ERROR "ISPC ${ISPC_VERSION} is too old. You need at least ISPC ${ISPC_VERSION_REQUIRED}.")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
GET_FILENAME_COMPONENT(ISPC_DIR ${EMBREE_ISPC_EXECUTABLE} PATH)
|
||||||
|
|
||||||
|
SET(EMBREE_ISPC_ADDRESSING 32 CACHE STRING "32vs64 bit addressing in ispc")
|
||||||
|
SET_PROPERTY(CACHE EMBREE_ISPC_ADDRESSING PROPERTY STRINGS 32 64)
|
||||||
|
MARK_AS_ADVANCED(EMBREE_ISPC_ADDRESSING)
|
||||||
|
|
||||||
|
MACRO (ISPC_COMPILE)
|
||||||
|
SET(ISPC_ADDITIONAL_ARGS "")
|
||||||
|
|
||||||
|
SET(ISPC_TARGET_EXT ${CMAKE_CXX_OUTPUT_EXTENSION})
|
||||||
|
STRING(REPLACE ";" "," ISPC_TARGET_ARGS "${ISPC_TARGETS}")
|
||||||
|
|
||||||
|
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
IF (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64|aarch64")
|
||||||
|
SET(ISPC_ARCHITECTURE "aarch64")
|
||||||
|
ELSE()
|
||||||
|
SET(ISPC_ARCHITECTURE "x86-64")
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
SET(ISPC_ARCHITECTURE "x86")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(ISPC_TARGET_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
|
IF(ISPC_INCLUDE_DIR)
|
||||||
|
STRING(REPLACE ";" ";-I;" ISPC_INCLUDE_DIR_PARMS "${ISPC_INCLUDE_DIR}")
|
||||||
|
SET(ISPC_INCLUDE_DIR_PARMS "-I" ${ISPC_INCLUDE_DIR_PARMS})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (WIN32 OR "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||||
|
SET(ISPC_OPT_FLAGS -O3)
|
||||||
|
ELSE()
|
||||||
|
SET(ISPC_OPT_FLAGS -O2)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
SET(ISPC_ADDITIONAL_ARGS ${ISPC_ADDITIONAL_ARGS} --dllexport)
|
||||||
|
ELSE()
|
||||||
|
SET(ISPC_ADDITIONAL_ARGS ${ISPC_ADDITIONAL_ARGS} --pic)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(ISPC_OBJECTS "")
|
||||||
|
|
||||||
|
FOREACH(src ${ARGN})
|
||||||
|
GET_FILENAME_COMPONENT(fname ${src} NAME_WE)
|
||||||
|
GET_FILENAME_COMPONENT(dir ${src} PATH)
|
||||||
|
|
||||||
|
SET(outdir "${ISPC_TARGET_DIR}/${dir}")
|
||||||
|
SET(input "${CMAKE_CURRENT_SOURCE_DIR}/${src}")
|
||||||
|
|
||||||
|
SET(deps "")
|
||||||
|
IF (EXISTS ${outdir}/${fname}.dev.idep)
|
||||||
|
FILE(READ ${outdir}/${fname}.dev.idep contents)
|
||||||
|
STRING(REPLACE " " ";" contents "${contents}")
|
||||||
|
STRING(REPLACE ";" "\\\\;" contents "${contents}")
|
||||||
|
STRING(REPLACE "\n" ";" contents "${contents}")
|
||||||
|
FOREACH(dep ${contents})
|
||||||
|
IF (EXISTS ${dep})
|
||||||
|
SET(deps ${deps} ${dep})
|
||||||
|
ENDIF (EXISTS ${dep})
|
||||||
|
ENDFOREACH(dep ${contents})
|
||||||
|
ENDIF ()
|
||||||
|
|
||||||
|
SET(results "${outdir}/${fname}.dev${ISPC_TARGET_EXT}")
|
||||||
|
|
||||||
|
# if we have multiple targets add additional object files
|
||||||
|
LIST(LENGTH ISPC_TARGETS NUM_TARGETS)
|
||||||
|
IF (NUM_TARGETS GREATER 1)
|
||||||
|
FOREACH(target ${ISPC_TARGETS})
|
||||||
|
IF (${target} STREQUAL "avx512skx-i32x16")
|
||||||
|
SET(target "avx512skx")
|
||||||
|
ENDIF()
|
||||||
|
SET(results ${results} "${outdir}/${fname}.dev_${target}${ISPC_TARGET_EXT}")
|
||||||
|
ENDFOREACH()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ADD_CUSTOM_COMMAND(
|
||||||
|
OUTPUT ${results} "${ISPC_TARGET_DIR}/${fname}_ispc.h"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E make_directory ${outdir}
|
||||||
|
COMMAND ${EMBREE_ISPC_EXECUTABLE}
|
||||||
|
-I "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||||
|
${ISPC_INCLUDE_DIR_PARMS}
|
||||||
|
${ISPC_DEFINITIONS}
|
||||||
|
--arch=${ISPC_ARCHITECTURE}
|
||||||
|
--addressing=${EMBREE_ISPC_ADDRESSING}
|
||||||
|
${ISPC_OPT_FLAGS}
|
||||||
|
--target=${ISPC_TARGET_ARGS}
|
||||||
|
--woff
|
||||||
|
--opt=fast-math
|
||||||
|
${ISPC_ADDITIONAL_ARGS}
|
||||||
|
-h "${ISPC_TARGET_DIR}/${fname}_ispc.h"
|
||||||
|
-MMM ${outdir}/${fname}.dev.idep
|
||||||
|
-o ${outdir}/${fname}.dev${ISPC_TARGET_EXT}
|
||||||
|
${input}
|
||||||
|
DEPENDS ${input} ${deps}
|
||||||
|
COMMENT "Building ISPC object ${outdir}/${fname}.dev${ISPC_TARGET_EXT}"
|
||||||
|
)
|
||||||
|
|
||||||
|
SET(ISPC_OBJECTS ${ISPC_OBJECTS} ${results})
|
||||||
|
ENDFOREACH()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
MACRO (ADD_EMBREE_ISPC_EXECUTABLE name)
|
||||||
|
SET(ISPC_SOURCES "")
|
||||||
|
SET(OTHER_SOURCES "")
|
||||||
|
FOREACH(src ${ARGN})
|
||||||
|
GET_FILENAME_COMPONENT(ext ${src} EXT)
|
||||||
|
IF (ext STREQUAL ".ispc")
|
||||||
|
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
|
||||||
|
ELSE ()
|
||||||
|
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
|
||||||
|
ENDIF ()
|
||||||
|
ENDFOREACH()
|
||||||
|
ISPC_COMPILE(${ISPC_SOURCES})
|
||||||
|
ADD_EXECUTABLE(${name} ${ISPC_OBJECTS} ${OTHER_SOURCES})
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
MACRO (ADD_ISPC_LIBRARY name type)
|
||||||
|
SET(ISPC_SOURCES "")
|
||||||
|
SET(OTHER_SOURCES "")
|
||||||
|
FOREACH(src ${ARGN})
|
||||||
|
GET_FILENAME_COMPONENT(ext ${src} EXT)
|
||||||
|
IF (ext STREQUAL ".ispc")
|
||||||
|
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
|
||||||
|
ELSE ()
|
||||||
|
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
|
||||||
|
ENDIF ()
|
||||||
|
ENDFOREACH()
|
||||||
|
ISPC_COMPILE(${ISPC_SOURCES})
|
||||||
|
ADD_LIBRARY(${name} ${type} ${ISPC_OBJECTS} ${OTHER_SOURCES})
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
ELSE (EMBREE_ISPC_SUPPORT)
|
||||||
|
|
||||||
|
MACRO (ADD_ISPC_LIBRARY name type)
|
||||||
|
SET(ISPC_SOURCES "")
|
||||||
|
SET(OTHER_SOURCES "")
|
||||||
|
FOREACH(src ${ARGN})
|
||||||
|
GET_FILENAME_COMPONENT(ext ${src} EXT)
|
||||||
|
IF (ext STREQUAL ".ispc")
|
||||||
|
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
|
||||||
|
ELSE ()
|
||||||
|
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
|
||||||
|
ENDIF ()
|
||||||
|
ENDFOREACH()
|
||||||
|
ADD_LIBRARY(${name} ${type} ${OTHER_SOURCES})
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
ENDIF (EMBREE_ISPC_SUPPORT)
|
||||||
60
Framework/external/embree/common/cmake/msvc.cmake
vendored
Normal file
60
Framework/external/embree/common/cmake/msvc.cmake
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
SET(FLAGS_SSE2 "/D__SSE__ /D__SSE2__")
|
||||||
|
SET(FLAGS_SSE42 "${FLAGS_SSE2} /D__SSE3__ /D__SSSE3__ /D__SSE4_1__ /D__SSE4_2__")
|
||||||
|
SET(FLAGS_AVX "${FLAGS_SSE42} /arch:AVX")
|
||||||
|
SET(FLAGS_AVX2 "${FLAGS_SSE42} /arch:AVX2")
|
||||||
|
|
||||||
|
SET(COMMON_CXX_FLAGS "")
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Gy") # package individual functions
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
|
||||||
|
ELSE()
|
||||||
|
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
|
||||||
|
ENDIF()
|
||||||
|
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
|
||||||
|
IF (EMBREE_STACK_PROTECTOR)
|
||||||
|
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEPENDENTLOADFLAG:0x2000")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEPENDENTLOADFLAG:0x2000")
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
|
||||||
|
STRING(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) # disable native runtime checks
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
|
||||||
|
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
|
||||||
|
|
||||||
|
SET(SECURE_LINKER_FLAGS "")
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
|
||||||
|
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||||
|
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
|
||||||
|
ENDIF()
|
||||||
|
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
|
||||||
|
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4217") # locally defined symbol XXX imported in function YYY (happens as the ISPC API layer uses exported library functions)
|
||||||
|
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4049") # warning LNK4049: locally defined symbol _rtcOccluded1M imported
|
||||||
|
|
||||||
|
INCLUDE(msvc_post)
|
||||||
|
|
||||||
13
Framework/external/embree/common/cmake/msvc_post.cmake
vendored
Normal file
13
Framework/external/embree/common/cmake/msvc_post.cmake
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# optionally use static runtime library
|
||||||
|
OPTION(EMBREE_STATIC_RUNTIME "Use the static version of the C/C++ runtime library." OFF)
|
||||||
|
IF (EMBREE_STATIC_RUNTIME)
|
||||||
|
STRING(REPLACE "/MDd" "/MTd" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
|
||||||
|
STRING(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
|
||||||
|
STRING(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# remove define NDEBUG and instead set define DEBUG for config RelWithDebInfo
|
||||||
|
STRING(REPLACE "NDEBUG" "DEBUG" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
|
||||||
359
Framework/external/embree/common/cmake/package.cmake
vendored
Normal file
359
Framework/external/embree/common/cmake/package.cmake
vendored
Normal file
|
|
@ -0,0 +1,359 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
INCLUDE(GNUInstallDirs)
|
||||||
|
|
||||||
|
IF (NOT EMBREE_ZIP_MODE AND NOT WIN32 AND NOT APPLE)
|
||||||
|
SET(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_BINDIR}/embree${EMBREE_VERSION_MAJOR}")
|
||||||
|
SET(CMAKE_INSTALL_FULL_BINDIR "${CMAKE_INSTALL_FULL_BINDIR}/embree${EMBREE_VERSION_MAJOR}")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# use full absolute path as install name
|
||||||
|
IF (NOT EMBREE_ZIP_MODE)
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_FULL_LIBDIR}")
|
||||||
|
ELSE()
|
||||||
|
IF(APPLE)
|
||||||
|
SET(CMAKE_INSTALL_RPATH "@loader_path/../${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
ELSE()
|
||||||
|
SET(CMAKE_INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install SYCL specific files
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
# SYCL library
|
||||||
|
IF (EMBREE_SYCL_SUPPORT AND EMBREE_INSTALL_DEPENDENCIES)
|
||||||
|
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
|
||||||
|
FILE(GLOB LIB_SYCL_LIB_FILES LIST_DIRECTORIES FALSE
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/sycl.lib"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/sycl?.lib")
|
||||||
|
INSTALL(FILES ${LIB_SYCL_LIB_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
FILE(GLOB LIB_SYCL_DLL_FILES LIST_DIRECTORIES FALSE
|
||||||
|
"${DPCPP_COMPILER_DIR}/../bin/sycl.dll"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../bin/sycl?.dll")
|
||||||
|
INSTALL(FILES ${LIB_SYCL_DLL_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/pi_level_zero.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/win_proxy_loader.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib OPTIONAL)
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/libmmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/pi_win_proxy_loader.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/pi_win_proxy_loader.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
|
||||||
|
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?.?"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?.?-?")
|
||||||
|
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libpi_level_zero.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so")
|
||||||
|
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
|
||||||
|
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so.?")
|
||||||
|
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install MSVC runtime
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
IF (WIN32)
|
||||||
|
IF(SYCL_ONEAPI_ICX AND EMBREE_INSTALL_DEPENDENCIES)
|
||||||
|
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}//libmmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE)
|
||||||
|
SET(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS TRUE)
|
||||||
|
INCLUDE(InstallRequiredSystemLibraries)
|
||||||
|
LIST(FILTER CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS INCLUDE REGEX ".*msvcp[0-9]+\.dll|.*vcruntime[0-9]+\.dll|.*vcruntime[0-9]+_[0-9]+\.dll")
|
||||||
|
INSTALL(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
ELSE()
|
||||||
|
|
||||||
|
IF(SYCL_ONEAPI_ICX AND EMBREE_INSTALL_DEPENDENCIES)
|
||||||
|
|
||||||
|
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so")
|
||||||
|
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so")
|
||||||
|
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
|
||||||
|
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so"
|
||||||
|
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so.?")
|
||||||
|
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install Headers
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
INSTALL(DIRECTORY include/embree4 DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT devel)
|
||||||
|
IF (NOT WIN32)
|
||||||
|
INSTALL(DIRECTORY man/man3 DESTINATION "${CMAKE_INSTALL_MANDIR}" COMPONENT devel)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install Models
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
IF (EMBREE_TUTORIALS)
|
||||||
|
INSTALL(DIRECTORY tutorials/models DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install Documentation
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/CHANGELOG.md" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/README.md" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/readme.pdf" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-TBB.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-OIDN.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-DPCPP.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-oneAPI-DPCPP.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install scripts to set embree paths
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
IF (EMBREE_ZIP_MODE)
|
||||||
|
IF (WIN32)
|
||||||
|
ELSEIF(APPLE)
|
||||||
|
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_macosx/embree-vars.sh" embree-vars.sh @ONLY)
|
||||||
|
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_macosx/embree-vars.csh" embree-vars.csh @ONLY)
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.sh" DESTINATION "." COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.csh" DESTINATION "." COMPONENT lib)
|
||||||
|
ELSE()
|
||||||
|
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_linux/embree-vars.sh" embree-vars.sh @ONLY)
|
||||||
|
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_linux/embree-vars.csh" embree-vars.csh @ONLY)
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.sh" DESTINATION "." COMPONENT lib)
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.csh" DESTINATION "." COMPONENT lib)
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# Install Embree CMake Configuration
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
IF (NOT EMBREE_ZIP_MODE)
|
||||||
|
SET(EMBREE_CONFIG_VERSION ${EMBREE_VERSION})
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_CONFIG_VERSION ${EMBREE_VERSION_MAJOR})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# why does this have to be so complicated...
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_STATIC_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||||
|
ELSE()
|
||||||
|
IF (WIN32)
|
||||||
|
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_IMPORT_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX})
|
||||||
|
ELSEIF (APPLE)
|
||||||
|
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_SHARED_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}.${EMBREE_CONFIG_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||||
|
ELSE()
|
||||||
|
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_SHARED_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${EMBREE_CONFIG_VERSION})
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
#IF (WIN32 OR EMBREE_ZIP_MODE)
|
||||||
|
# for local "installs" and on Windows we want the cmake config files placed
|
||||||
|
# in the install root, such that users can point the CMake variable
|
||||||
|
# embree_DIR just to the install folder
|
||||||
|
# SET(EMBREE_CMAKECONFIG_DIR ".")
|
||||||
|
# SET(EMBREE_CMAKEEXPORT_DIR "cmake")
|
||||||
|
# SET(EMBREE_RELATIVE_ROOT_DIR ".")
|
||||||
|
#ELSE()
|
||||||
|
SET(EMBREE_CMAKECONFIG_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/embree-${EMBREE_VERSION}")
|
||||||
|
SET(EMBREE_CMAKEEXPORT_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/embree-${EMBREE_VERSION}")
|
||||||
|
IF (WIN32)
|
||||||
|
SET(EMBREE_RELATIVE_ROOT_DIR "../../../")
|
||||||
|
ELSE()
|
||||||
|
FILE(RELATIVE_PATH EMBREE_RELATIVE_ROOT_DIR "/${EMBREE_CMAKECONFIG_DIR}" "/")
|
||||||
|
ENDIF()
|
||||||
|
#ENDIF()
|
||||||
|
|
||||||
|
CONFIGURE_FILE(common/cmake/embree-config.cmake embree-config-install.cmake @ONLY)
|
||||||
|
CONFIGURE_FILE(common/cmake/embree-config-version.cmake embree-config-version.cmake @ONLY)
|
||||||
|
# create a config file for the build directory
|
||||||
|
CONFIGURE_FILE(common/cmake/embree-config-builddir.cmake embree-config.cmake @ONLY)
|
||||||
|
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-config-install.cmake" DESTINATION "${EMBREE_CMAKECONFIG_DIR}" RENAME "embree-config.cmake" COMPONENT devel)
|
||||||
|
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-config-version.cmake" DESTINATION "${EMBREE_CMAKECONFIG_DIR}" COMPONENT devel)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# CPack specific stuff
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
SET(CPACK_PACKAGE_NAME "Intel(R) Embree Ray Tracing Kernels")
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "embree-${EMBREE_VERSION}${EMBREE_VERSION_NOTE}")
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.sycl")
|
||||||
|
SET(EMBREE_VERSION_SUFFIX)
|
||||||
|
ENDIF()
|
||||||
|
#SET(CPACK_PACKAGE_ICON "${PROJECT_SOURCE_DIR}/embree-doc/images/icon.png")
|
||||||
|
#SET(CPACK_PACKAGE_RELOCATABLE TRUE)
|
||||||
|
SET(CPACK_STRIP_FILES TRUE)
|
||||||
|
|
||||||
|
SET(CPACK_PACKAGE_VERSION_MAJOR ${EMBREE_VERSION_MAJOR})
|
||||||
|
SET(CPACK_PACKAGE_VERSION_MINOR ${EMBREE_VERSION_MINOR})
|
||||||
|
SET(CPACK_PACKAGE_VERSION_PATCH ${EMBREE_VERSION_PATCH})
|
||||||
|
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Intel(R) Embree implements high performance ray tracing kernels including acceleration structure construction and traversal.")
|
||||||
|
SET(CPACK_PACKAGE_VENDOR "Intel Corporation")
|
||||||
|
SET(CPACK_PACKAGE_CONTACT embree_support@intel.com)
|
||||||
|
|
||||||
|
|
||||||
|
SET(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||||
|
SET(CPACK_COMPONENTS_GROUPING ONE_PER_GROUP)
|
||||||
|
|
||||||
|
SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library")
|
||||||
|
SET(CPACK_COMPONENT_LIB_DESCRIPTION "The Embree library including documentation.")
|
||||||
|
SET(CPACK_COMPONENT_LIB_GROUP "embree")
|
||||||
|
|
||||||
|
SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development")
|
||||||
|
SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Header Files for C and ISPC required to develop applications with Embree.")
|
||||||
|
SET(CPACK_COMPONENT_DEVEL_GROUP "embree")
|
||||||
|
|
||||||
|
SET(CPACK_COMPONENT_EXAMPLES_DISPLAY_NAME "Examples")
|
||||||
|
SET(CPACK_COMPONENT_EXAMPLES_DESCRIPTION "Tutorials demonstrating how to use Embree.")
|
||||||
|
SET(CPACK_COMPONENT_EXAMPLES_GROUP "embree")
|
||||||
|
|
||||||
|
SET(CPACK_COMPONENT_TESTING_DISPLAY_NAME "Testing")
|
||||||
|
SET(CPACK_COMPONENT_TESTING_DESCRIPTION "Models and reference images for tests")
|
||||||
|
SET(CPACK_COMPONENT_TESTING_GROUP "embree-testing")
|
||||||
|
|
||||||
|
# dependencies between components
|
||||||
|
#SET(CPACK_COMPONENT_DEVEL_DEPENDS lib)
|
||||||
|
#SET(CPACK_COMPONENT_EXAMPLES_DEPENDS lib)
|
||||||
|
#SET(CPACK_COMPONENT_LIB_REQUIRED ON) # always install the libs
|
||||||
|
|
||||||
|
# point to readme and license files
|
||||||
|
SET(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md")
|
||||||
|
SET(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE.txt")
|
||||||
|
|
||||||
|
# Windows specific settings
|
||||||
|
IF(WIN32)
|
||||||
|
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
SET(ARCH x64)
|
||||||
|
SET(CPACK_PACKAGE_NAME "${CPACK_PACKAGE_NAME} x64")
|
||||||
|
ELSE()
|
||||||
|
SET(ARCH win32)
|
||||||
|
SET(CPACK_PACKAGE_NAME "${CPACK_PACKAGE_NAME} Win32")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(CPACK_GENERATOR ZIP)
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.${ARCH}.windows")
|
||||||
|
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
|
||||||
|
SET(PACKAGE_EXT "zip")
|
||||||
|
|
||||||
|
# MacOSX specific settings
|
||||||
|
ELSEIF(APPLE)
|
||||||
|
|
||||||
|
CONFIGURE_FILE(README.md README.txt)
|
||||||
|
SET(CPACK_RESOURCE_FILE_README "${PROJECT_BINARY_DIR}/README.txt")
|
||||||
|
|
||||||
|
SET(CPACK_GENERATOR ZIP)
|
||||||
|
IF (EMBREE_ARM)
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.arm64.macosx")
|
||||||
|
ELSE()
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx")
|
||||||
|
ENDIF()
|
||||||
|
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
|
||||||
|
SET(PACKAGE_EXT "zip")
|
||||||
|
|
||||||
|
add_custom_target(
|
||||||
|
post_package_notarize "${PROJECT_SOURCE_DIR}/scripts/package_post_build_notarize_macosx.sh" ${PACKAGE_BASE_NAME} ${EMBREE_SIGN_FILE}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Linux specific settings
|
||||||
|
ELSE()
|
||||||
|
SET(CPACK_GENERATOR TGZ)
|
||||||
|
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux")
|
||||||
|
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
|
||||||
|
SET(PACKAGE_EXT "tar.gz")
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
SET(EMBREE_VERSION_SYCL_SUFFIX ".sycl")
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
|
||||||
|
add_custom_target(
|
||||||
|
build ${CMAKE_COMMAND} --build . --config ${CMAKE_BUILD_TYPE} --target package -j8
|
||||||
|
COMMAND ${CMAKE_COMMAND} -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_build.cmake
|
||||||
|
)
|
||||||
|
|
||||||
|
add_custom_target(
|
||||||
|
test_package ${CMAKE_COMMAND} -DWHAT="UNPACK" -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
|
||||||
|
COMMAND cd embree_install/testing && ${CMAKE_COMMAND} -B build -DEMBREE_TESTING_INTENSITY=${EMBREE_TESTING_INTENSITY}
|
||||||
|
COMMAND ctest --test-dir ${CMAKE_CURRENT_BINARY_DIR}/embree_install/testing/build -VV -C ${CMAKE_BUILD_TYPE} --output-log ctest.output
|
||||||
|
COMMAND ${CMAKE_COMMAND} -DWHAT="CHECK" -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if(WIN32)
|
||||||
|
set(INTEGRATE_BINARY "./build/Release/test.exe")
|
||||||
|
else()
|
||||||
|
set(INTEGRATE_BINARY "./build/test")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_custom_target(
|
||||||
|
test_integration ${CMAKE_COMMAND} -DWHAT="UNPACK" -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
|
||||||
|
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${CMAKE_COMMAND} -B build --preset ${EMBREE_TESTING_INTEGRATION_PRESET} -Dembree_DIR="${CMAKE_CURRENT_BINARY_DIR}/embree_install/lib/cmake/embree-${EMBREE_VERSION}"
|
||||||
|
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${CMAKE_COMMAND} --build build --config Release
|
||||||
|
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${INTEGRATE_BINARY}
|
||||||
|
)
|
||||||
4
Framework/external/embree/common/cmake/rpm_ldconfig.sh
vendored
Normal file
4
Framework/external/embree/common/cmake/rpm_ldconfig.sh
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
/sbin/ldconfig
|
||||||
561
Framework/external/embree/common/cmake/test.cmake
vendored
Normal file
561
Framework/external/embree/common/cmake/test.cmake
vendored
Normal file
|
|
@ -0,0 +1,561 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
if (BUILD_TESTING OR EMBREE_TESTING_INSTALL_TESTS)
|
||||||
|
|
||||||
|
INCLUDE(CTest)
|
||||||
|
|
||||||
|
if (EMBREE_TESTING_INSTALL_TESTS)
|
||||||
|
SET(EMBREE_INSTALL_CTESTTESTFILE "${CMAKE_CURRENT_BINARY_DIR}/embree-addtests.cmake")
|
||||||
|
file(WRITE "${EMBREE_INSTALL_CTESTTESTFILE}" "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT EMBREE_TESTING_PACKAGE_TEST_PROJECT)
|
||||||
|
IF (WIN32)
|
||||||
|
IF("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
|
||||||
|
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
|
||||||
|
ELSE()
|
||||||
|
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
ENDIF()
|
||||||
|
else()
|
||||||
|
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${EMBREE_INSTALL_BINDIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT EMBREE_TESTING_PACKAGE_TEST_PROJECT)
|
||||||
|
SET(EMBREE_TESTING_INTENSITY 1 CACHE STRING "Intensity of testing (0 = no testing, 1 = verify and tutorials, 2 = light testing, 3 = intensive testing, 4 = very intensive testing.")
|
||||||
|
SET_PROPERTY(CACHE EMBREE_TESTING_INTENSITY PROPERTY STRINGS 0 1 2 3 4)
|
||||||
|
SET(EMBREE_TESTING_ONLY_SYCL_TESTS OFF CACHE BOOL "Run only tests with the sycl support.")
|
||||||
|
SET(EMBREE_TESTING_MEMCHECK OFF CACHE BOOL "Turns on memory checking for some tests.")
|
||||||
|
SET(EMBREE_TESTING_BENCHMARK OFF CACHE BOOL "Turns benchmarking on.")
|
||||||
|
SET(EMBREE_TESTING_BENCHMARK_DATABASE "${PROJECT_BINARY_DIR}" CACHE PATH "Path to database for benchmarking.")
|
||||||
|
SET(EMBREE_TESTING_PACKAGE OFF CACHE BOOL "Packages release as test.")
|
||||||
|
SET(EMBREE_TESTING_KLOCWORK OFF CACHE BOOL "Runs Kocwork as test.")
|
||||||
|
SET(EMBREE_TESTING_SDE OFF CACHE STRING "Uses SDE to run tests for specified CPU.")
|
||||||
|
SET_PROPERTY(CACHE EMBREE_TESTING_SDE PROPERTY STRINGS OFF pnr nhm wsm snb ivb hsw bdw knl skl skx cnl)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
FUNCTION (SET_EMBREE_TEST_PROPERTIES testname)
|
||||||
|
SET(variants "_cpp;_ispc;_sycl")
|
||||||
|
foreach(v ${variants})
|
||||||
|
if (v STREQUAL "_cpp")
|
||||||
|
SET(v "")
|
||||||
|
endif()
|
||||||
|
set(testnamedef "EMBREE_TEST_${testname}${v}_DEFINED")
|
||||||
|
if(${testnamedef})
|
||||||
|
SET_TESTS_PROPERTIES(${testname}${v} ${ARGN})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if (EMBREE_TESTING_INSTALL_TESTS)
|
||||||
|
file(APPEND "${EMBREE_INSTALL_CTESTTESTFILE}" "SET_EMBREE_TEST_PROPERTIES(${testname} ${ARGN}) \n")
|
||||||
|
endif()
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
MACRO (ADD_EMBREE_GENERIC_TEST testname executable)
|
||||||
|
ADD_TEST(NAME ${testname}
|
||||||
|
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
|
||||||
|
COMMAND ${executable} ${ARGN})
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
|
||||||
|
SET(${testnamedef} "1" CACHE INTERNAL "")
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
MACRO (ADD_EMBREE_GENERIC_CPP_TEST testname executable)
|
||||||
|
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
|
||||||
|
ADD_TEST(NAME ${testname}
|
||||||
|
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
|
||||||
|
COMMAND ${executable} ${ARGN})
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
|
||||||
|
SET(${testnamedef} "1" CACHE INTERNAL "")
|
||||||
|
endif()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
MACRO (ADD_EMBREE_GENERIC_ISPC_TEST testname executable)
|
||||||
|
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
|
||||||
|
IF (EMBREE_ISPC_SUPPORT AND EMBREE_RAY_PACKETS)
|
||||||
|
ADD_TEST(NAME ${testname}_ispc
|
||||||
|
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
|
||||||
|
COMMAND ${executable}_ispc ${ARGN})
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_ispc_DEFINED)
|
||||||
|
SET(${testnamedef} "1" CACHE INTERNAL "")
|
||||||
|
ENDIF()
|
||||||
|
endif()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
MACRO (ADD_EMBREE_GENERIC_SYCL_TEST testname executable)
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
ADD_TEST(NAME ${testname}_sycl
|
||||||
|
WORKING_DIRECTORY ${MY_PROJECT_BINARY_DIR}
|
||||||
|
COMMAND ${executable}_sycl ${ARGN})
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_sycl_DEFINED)
|
||||||
|
SET(${testnamedef} 1 CACHE INTERNAL "")
|
||||||
|
SET_TESTS_PROPERTIES(${testname}_sycl PROPERTIES TIMEOUT 50)
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Checks if the current cmake configuration is compatible with <condition>
|
||||||
|
# condition may be a triple of CMAKE_VARIABLE_NAME op VALUE
|
||||||
|
# supported operators for op are: ==, !=, <, <=, >, >=
|
||||||
|
FUNCTION (EMBREE_TESTING_CHECK_OPTION out condition)
|
||||||
|
# parse condition into list
|
||||||
|
string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" tokens "${condition}")
|
||||||
|
LIST(LENGTH tokens token_count)
|
||||||
|
IF (NOT ${token_count} EQUAL 3)
|
||||||
|
message(FATAL_ERROR "illegal embree_opitons condition: ${condition}")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
# we require every condition to follow the scheme "variable_name op value"
|
||||||
|
LIST(GET tokens 0 option)
|
||||||
|
LIST(GET tokens 1 comp)
|
||||||
|
LIST(GET tokens 2 value)
|
||||||
|
|
||||||
|
STRING(STRIP ${option} option)
|
||||||
|
STRING(STRIP ${comp} comp)
|
||||||
|
STRING(STRIP ${value} value)
|
||||||
|
|
||||||
|
SET(${out} 0 PARENT_SCOPE)
|
||||||
|
if ("${comp}" STREQUAL "==")
|
||||||
|
if ("${${option}}" STREQUAL "${value}")
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
elseif ("${comp}" STREQUAL "!=")
|
||||||
|
if (NOT ("${${option}}" STREQUAL "${value}"))
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
elseif ("${comp}" STREQUAL ">")
|
||||||
|
if ("${${option}}" GREATER "${value}")
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
elseif ("${comp}" STREQUAL ">=")
|
||||||
|
if ("${${option}}" GREATER_EQUAL "${value}")
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
elseif ("${comp}" STREQUAL "<")
|
||||||
|
if ("${${option}}" LESS "${value}")
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
elseif ("${comp}" STREQUAL "<=")
|
||||||
|
if ("${${option}}" LESS_EQUAL "${value}")
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "Could not parse embree_option condition: ${condition}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
# Checks multiple options from a list with EMBREE_TESTING_CHECK_OPTION
|
||||||
|
FUNCTION (EMBREE_TESTING_CHECK_OPTIONS_LIST out conditions)
|
||||||
|
SET(${out} 1 PARENT_SCOPE)
|
||||||
|
FOREACH (c ${conditions})
|
||||||
|
EMBREE_TESTING_CHECK_OPTION(myout ${c})
|
||||||
|
|
||||||
|
IF (myout EQUAL 0)
|
||||||
|
SET(${out} 0 PARENT_SCOPE)
|
||||||
|
BREAK()
|
||||||
|
ENDIF()
|
||||||
|
ENDFOREACH()
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
|
||||||
|
# looks for ifile in multiple possible locations and outputs a file with absolute path in ofile
|
||||||
|
FUNCTION (EMBREE_FIND_TEST_FILE ifile ofile errmsgflavor)
|
||||||
|
if (EXISTS "${ifile}") # abs path, use get_filename_component because it could also be relative to cwd
|
||||||
|
get_filename_component(absifile "${ifile}" ABSOLUTE)
|
||||||
|
SET(${ofile} ${absifile} PARENT_SCOPE)
|
||||||
|
elseif(EXISTS "${PROJECT_SOURCE_DIR}/tests/${ifile}") # testing dir
|
||||||
|
set(${ofile} "${PROJECT_SOURCE_DIR}/tests/${ifile}" PARENT_SCOPE)
|
||||||
|
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${ifile}") # relative to source folder
|
||||||
|
set(${ofile} "${CMAKE_CURRENT_SOURCE_DIR}/${ifile}" PARENT_SCOPE)
|
||||||
|
else()
|
||||||
|
set(${ofile} "" PARENT_SCOPE)
|
||||||
|
if (errmsgflavor)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"Could not find ${errmsgflavor} \"${ifile}\"\n"
|
||||||
|
"looked for:\n"
|
||||||
|
" ${inputfile}\n"
|
||||||
|
" ${PROJECT_SOURCE_DIR}/tests/${ifile}\n"
|
||||||
|
" ${CMAKE_CURRENT_SOURCE_DIR}/${ifile}\n"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
|
||||||
|
FUNCTION (EMBREE_ADD_TEST_PARSE_SUBLIST args keyword sublist)
|
||||||
|
SET(myargs ${args})
|
||||||
|
SET(mysublist "")
|
||||||
|
SET(keywords "ECS;XML;NO_REFERENCE;REFERENCE;REFERENCE_SUFFIX;INTENSITY;CONDITION_FILE;CONDITION;ARGS;NO_CPP;NO_ISPC;NO_SYCL;GEN_REFERENCE;")
|
||||||
|
|
||||||
|
list(FIND nargs ${keyword} istart)
|
||||||
|
if (NOT(istart EQUAL -1))
|
||||||
|
list(LENGTH myargs iend)
|
||||||
|
foreach(k ${keywords})
|
||||||
|
list(FIND myargs ${k} i)
|
||||||
|
if (NOT(i EQUAL -1) AND (i GREATER istart) AND (i LESS iend))
|
||||||
|
SET(iend ${i})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
MATH(EXPR i "${istart}+1")
|
||||||
|
while (i LESS iend)
|
||||||
|
list(GET myargs ${i} elem)
|
||||||
|
list(APPEND mysublist ${elem})
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
endwhile()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
SET(${sublist} ${mysublist} PARENT_SCOPE)
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
FUNCTION (EMBREE_ADD_TEST_PARSE_FLAG args keyword value)
|
||||||
|
SET(myargs ${args})
|
||||||
|
SET(${value} OFF PARENT_SCOPE)
|
||||||
|
list(FIND nargs ${keyword} i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
SET(${value} ON PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
# ADD_EMBREE_TEST_ECS(testname exename [ECS <file> | XML <file>] [NO_REFERENCE | REFERENCE <path> | REFERENCE_SUFFIX <suffix>] [INTENSITY <i>] [CONDITION <conds>] [ARGS <args>] [GEN_REFERENCE])
|
||||||
|
# [ECS <file> | XML <inputfile> | OPTIONS <inputfile>]
|
||||||
|
# - looks for file and calls the test command with either "-c <inputfile>.ecs" or "-i <inputfile>.xml"
|
||||||
|
#
|
||||||
|
# [NO_REFERENCE | REFERENCE <path> | REFERENCE_SUFFIX <suffix>]
|
||||||
|
# - if not found a reference is will be expected in the same folder as *.ecs with name *.ecs.exename.exr
|
||||||
|
# - NO_REFERENCE: don't look for a reference (no --compare in test command)
|
||||||
|
# - REFERENCE <path>: use the reference located in <path>. Same rules aplly as for finding ecs files: absolute or relative to CMAKE_CURRENT_SOURCE_DIR path, must not be located outside the embree root dir.
|
||||||
|
# - REFERENCE_SUFFIX <suffix>: use the default reference location and name with a suffix before the last file extension, e.g. *.ecs.exename<suffix>.exr
|
||||||
|
# - if this argument is not specified, looks for
|
||||||
|
# 1. <inputfile>.exename.exr, or
|
||||||
|
# 2. <testname>.exr, if no <inputfile> was given
|
||||||
|
#
|
||||||
|
# [INTENSITY <i>]
|
||||||
|
# - default i = 1
|
||||||
|
# - sets the intensity level for the test, test is only run if ${EMBREE_TESTING_INTENSITY} GREATER_EQUAL i
|
||||||
|
# - could be done with an *.embree_options file, but this is more flexible, e.g. you can easier share reference images without specifying an absolute path
|
||||||
|
# - DOES NOT overrite EMBREE_TESTING_INTENSITY, if specified in *.embree_options
|
||||||
|
#
|
||||||
|
# [CONDITION_FILE <file>]
|
||||||
|
# - file containing additional conditions
|
||||||
|
# - conditions are specified linewise in form of: EMBREE_OPTION op VALUE, where EMBREE_OPTION is a cmake variable used during embree configuration and op is one of ==, !=, <, <=, >, >=
|
||||||
|
# - if this argument is not specified, looks for
|
||||||
|
# 1. <inputfile>.embree_options, or
|
||||||
|
# 2. <testname>.embree_options, if no <inputfile> was given
|
||||||
|
#
|
||||||
|
# [CONDITION <conds>]
|
||||||
|
# - cmake list of additional conditions, specified the same way as in an embree_options file
|
||||||
|
#
|
||||||
|
# [ARGS <args>]
|
||||||
|
# - additional arguments for the test command
|
||||||
|
#
|
||||||
|
# [GEN_REFERENCE]
|
||||||
|
# - writes the reference to the expected location
|
||||||
|
# - could also be done with the ARGS parameter, but this way we don't have to deal with paths
|
||||||
|
#
|
||||||
|
# EXAMPLES
|
||||||
|
#
|
||||||
|
# all optional arguments default,
|
||||||
|
# - looks for points.ecs.embree_options to filter out test by configured options, if not found no restrictions
|
||||||
|
# - runs for EMBREE_TESTING_INTENSITY >= 1
|
||||||
|
# - reference will be expected in the same folder as points.ecs with name points.ecs.embree_viewer.exr
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs") # 1)
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "${PROJECT_SOURCE_DIR}/models/xxx/points.ecs") # 2) same as 1) but with absolute path to ecs
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "points.ecs") # 3) same as 1) but looks for points.ecs in the ${CMAKE_CURRENT_SOURCE_DIR} i.e. <embree_root>/tutorials/viewer/points.ecs
|
||||||
|
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" ARGS --coherent INTENSITY 2) # 4) same as 1) but runs only at EMBREE_TESTING_INTENSITY >= 2
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" REFERENCE_SUFFIX "_quads" ARGS
|
||||||
|
# --coherent
|
||||||
|
# --convert-triangles-to-quads INTENSITY 2) # 5) same as 4) but expects reference name points.ecr.embree_viewer_quads.ecs
|
||||||
|
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" REFERENCE "special.exr" ARGS
|
||||||
|
# --coherent
|
||||||
|
# --convert-triangles-to-quads INTENSITY 2) # 5) same as 4) but expects reference <embree-root>/tutorials/viewer/special.exr
|
||||||
|
## 1) - 5) all share the same base input arguments defined in points.ecs as well as the points.ecs.embree_options
|
||||||
|
|
||||||
|
#ADD_EMBREE_TEST_ECS(point_geometry embree_point_geometry) # 6) no ecs file, uses point_geometry.embree_options if exists, expects reference ${CMAKE_CURRENT_SOURCE_DIR}/point_geometry.exr
|
||||||
|
#ADD_EMBREE_TEST_ECS(verify verify NO_REFERENCE) # 7) no ecs file, uses verify.embree_options if exists, no reference
|
||||||
|
#ADD_EMBREE_TEST_ECS(verify_memcheck verify NO_REFERENCE CONDITIONS "EMBREE_TESTING_MEMCHECK == ON") # 8) same as 7) but with extra condition
|
||||||
|
|
||||||
|
FUNCTION (ADD_EMBREE_TEST_ECS testname executable)
|
||||||
|
if (EMBREE_TESTING_FILTER_TESTNAMES)
|
||||||
|
list(FIND EMBREE_TESTING_FILTER_TESTNAMES "${testname}" i)
|
||||||
|
if (${i} EQUAL -1)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
SET(nargs ${ARGN})
|
||||||
|
|
||||||
|
# disable everything
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
|
||||||
|
SET(${testnamedef} "0" CACHE INTERNAL "")
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_ispc_DEFINED)
|
||||||
|
SET(${testnamedef} "0" CACHE INTERNAL "")
|
||||||
|
SET(testnamedef EMBREE_TEST_${testname}_sycl_DEFINED)
|
||||||
|
SET(${testnamedef} "0" CACHE INTERNAL "")
|
||||||
|
|
||||||
|
# parsing input file
|
||||||
|
# ECS mode -> single parameter with filename after ECS keyword
|
||||||
|
list(FIND nargs "ECS" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
SET(inputtype "-c")
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
list(GET nargs ${i} out)
|
||||||
|
EMBREE_FIND_TEST_FILE("${out}" inputfile "test file")
|
||||||
|
endif()
|
||||||
|
# XML mode -> single parameter with filename after XML keyword
|
||||||
|
list(FIND nargs "XML" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
SET(inputtype "-i")
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
list(GET nargs ${i} out)
|
||||||
|
EMBREE_FIND_TEST_FILE("${out}" inputfile "test file")
|
||||||
|
endif()
|
||||||
|
# no filetype keyword -> look for <testname>.ecs/xml
|
||||||
|
# does not need to be specified
|
||||||
|
if (NOT inputtype)
|
||||||
|
EMBREE_FIND_TEST_FILE("${testname}.ecs" inputfile "")
|
||||||
|
if(inputfile)
|
||||||
|
SET(inputtype "-c")
|
||||||
|
endif()
|
||||||
|
EMBREE_FIND_TEST_FILE("${testname}.ecs" inputfile "")
|
||||||
|
if(inputfile)
|
||||||
|
SET(inputtype "-i")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# parsing refernce image
|
||||||
|
# no reference image mode -> no additional parameter to parse
|
||||||
|
list(FIND nargs "NO_REFERENCE" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
SET(no_reference ON)
|
||||||
|
endif()
|
||||||
|
# reference suffix mode -> singe parameter with suffix to default reference file name
|
||||||
|
list(FIND nargs "REFERENCE_SUFFIX" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
list(GET nargs ${i} out)
|
||||||
|
EMBREE_FIND_TEST_FILE("${inputfile}.${executable}${out}.exr" referencefile "reference image")
|
||||||
|
endif()
|
||||||
|
# reference mode -> singe parameter with absolute path to reference image
|
||||||
|
list(FIND nargs "REFERENCE" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
list(GET nargs ${i} out)
|
||||||
|
EMBREE_FIND_TEST_FILE("${out}" referencefile "reference image")
|
||||||
|
endif()
|
||||||
|
# no reference keyword -> look for <testname>.exr and <inputfile>.<executable>.exr respectively
|
||||||
|
if ((NOT no_reference) AND (NOT referencefile))
|
||||||
|
if (NOT inputfile)
|
||||||
|
EMBREE_FIND_TEST_FILE("${testname}.exr" referencefile "reference image")
|
||||||
|
else()
|
||||||
|
EMBREE_FIND_TEST_FILE("${inputfile}.${executable}.exr" referencefile "reference image")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# parsing intensity
|
||||||
|
# single integer parameter
|
||||||
|
SET(intensity 1)
|
||||||
|
list(FIND nargs "INTENSITY" i)
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
MATH(EXPR i "${i}+1")
|
||||||
|
list(GET nargs ${i} intensity)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# parsing condition
|
||||||
|
SET(conditions "")
|
||||||
|
SET(conditionsfile)
|
||||||
|
list(FIND nargs "CONDITION_FILE" i)
|
||||||
|
# condition file -> single parameter to absolute path of conditions file
|
||||||
|
if (NOT(i EQUAL -1))
|
||||||
|
list(GET nargs ${i} conditionsfile)
|
||||||
|
EMBREE_FIND_TEST_FILE("${conditionsfile}" conditionsfile "")
|
||||||
|
# no <inputfile> specified -> look for <testname>.embree_options
|
||||||
|
elseif (NOT inputtype)
|
||||||
|
EMBREE_FIND_TEST_FILE("${testname}.embree_options" conditionsfile "")
|
||||||
|
# <inputfile> specified -> look for <inputfile>.embree_options
|
||||||
|
else()
|
||||||
|
EMBREE_FIND_TEST_FILE("${inputfile}.embree_options" conditionsfile "")
|
||||||
|
endif()
|
||||||
|
if (conditionsfile)
|
||||||
|
file(READ "${conditionsfile}" lines)
|
||||||
|
string(REGEX REPLACE "\n" ";" conditions "${lines}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# parsing additional conditions
|
||||||
|
EMBREE_ADD_TEST_PARSE_SUBLIST("${nargs}" "CONDITION" extra_conditions)
|
||||||
|
list(APPEND conditions ${extra_conditions})
|
||||||
|
|
||||||
|
# parsing extra args
|
||||||
|
EMBREE_ADD_TEST_PARSE_SUBLIST("${nargs}" "ARGS" extraargs)
|
||||||
|
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "MEMCHECK" memcheck)
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_CPP" no_cpp)
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_ISPC" no_ispc)
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_SYCL" no_sycl)
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_POSTFIX" no_postfix)
|
||||||
|
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "GEN_REFERENCE" gen_reference)
|
||||||
|
|
||||||
|
# add the test, if the conditions are met
|
||||||
|
SET(matchconditions ON)
|
||||||
|
if (conditions)
|
||||||
|
EMBREE_TESTING_CHECK_OPTIONS_LIST(out "${conditions}")
|
||||||
|
if (out EQUAL 0)
|
||||||
|
SET(matchconditions OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# LESS_EQUAL not supported on all CI runners
|
||||||
|
if (matchconditions AND ((intensity LESS EMBREE_TESTING_INTENSITY) OR (intensity EQUAL EMBREE_TESTING_INTENSITY)))
|
||||||
|
SET(args "")
|
||||||
|
if (inputfile)
|
||||||
|
list(APPEND args "${inputtype} ${inputfile}")
|
||||||
|
endif()
|
||||||
|
if (NOT no_reference)
|
||||||
|
list(APPEND args "--compare ${referencefile}")
|
||||||
|
endif()
|
||||||
|
if (gen_reference)
|
||||||
|
list(APPEND args "-o ${referencefile}")
|
||||||
|
endif()
|
||||||
|
foreach(a ${extraargs})
|
||||||
|
list(APPEND args "${a}")
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (memcheck)
|
||||||
|
list(PREPEND args "${MY_PROJECT_BINARY_DIR}/${executable}")
|
||||||
|
list(PREPEND args "${EMBREE_MEMORYCHECK_COMMAND_OPTIONS}")
|
||||||
|
if (no_postfix)
|
||||||
|
ADD_EMBREE_GENERIC_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
|
||||||
|
else()
|
||||||
|
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
|
||||||
|
if (NOT(no_cpp))
|
||||||
|
ADD_EMBREE_GENERIC_CPP_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
|
||||||
|
endif()
|
||||||
|
if (NOT(no_ispc))
|
||||||
|
ADD_EMBREE_GENERIC_ISPC_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (NOT(no_sycl))
|
||||||
|
ADD_EMBREE_GENERIC_SYCL_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
if (no_postfix)
|
||||||
|
ADD_EMBREE_GENERIC_TEST(${testname} ${executable} ${args})
|
||||||
|
else()
|
||||||
|
if (NOT(no_cpp))
|
||||||
|
ADD_EMBREE_GENERIC_CPP_TEST(${testname} ${executable} ${args})
|
||||||
|
endif()
|
||||||
|
if (NOT(no_ispc))
|
||||||
|
ADD_EMBREE_GENERIC_ISPC_TEST(${testname} ${executable} ${args})
|
||||||
|
endif()
|
||||||
|
if (NOT(no_sycl))
|
||||||
|
ADD_EMBREE_GENERIC_SYCL_TEST(${testname} ${executable} ${args})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (EMBREE_TESTING_INSTALL_TESTS)
|
||||||
|
if (inputfile)
|
||||||
|
get_filename_component(inputpath ${inputfile} DIRECTORY)
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" inputpath "${inputpath}")
|
||||||
|
INSTALL(FILES "${inputfile}"
|
||||||
|
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${inputpath}"
|
||||||
|
COMPONENT testing)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (conditionsfile)
|
||||||
|
get_filename_component(conditionspath ${conditionsfile} DIRECTORY)
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" conditionspath "${conditionspath}")
|
||||||
|
INSTALL(FILES "${conditionsfile}"
|
||||||
|
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${conditionspath}"
|
||||||
|
COMPONENT testing)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (referencefile)
|
||||||
|
get_filename_component(referencepath ${referencefile} DIRECTORY)
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" referencepath "${referencepath}")
|
||||||
|
INSTALL(FILES "${referencefile}"
|
||||||
|
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${referencepath}"
|
||||||
|
COMPONENT testing)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
SET(testcall "ADD_EMBREE_TEST_ECS(${testname} ${executable}")
|
||||||
|
if (inputfile)
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" inputfile "${inputfile}")
|
||||||
|
if (inputtype STREQUAL "-c")
|
||||||
|
SET(testcall "${testcall} \n ECS ${inputfile}")
|
||||||
|
elseif (inputtype STREQUAL "-i")
|
||||||
|
SET(testcall "${testcall} \n XML ${inputfile}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (no_reference)
|
||||||
|
SET(testcall "${testcall} \n NO_REFERENCE")
|
||||||
|
else()
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" referencefile "${referencefile}")
|
||||||
|
SET(testcall "${testcall} \n REFERENCE ${referencefile}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
SET(testcall "${testcall} \n INTENSITY ${intensity}")
|
||||||
|
if (${memcheck})
|
||||||
|
SET(testcall "${testcall} \n MEMCHECK")
|
||||||
|
endif()
|
||||||
|
if (${no_cpp})
|
||||||
|
SET(testcall "${testcall} \n NO_CPP")
|
||||||
|
endif()
|
||||||
|
if (${no_ispc})
|
||||||
|
SET(testcall "${testcall} \n NO_ISPC")
|
||||||
|
endif()
|
||||||
|
if (${no_sycl})
|
||||||
|
SET(testcall "${testcall} \n NO_SYCL")
|
||||||
|
endif()
|
||||||
|
if (${no_postfix})
|
||||||
|
SET(testcall "${testcall} \n NO_POSTFIX")
|
||||||
|
endif()
|
||||||
|
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" conditionsfile "${conditionsfile}")
|
||||||
|
SET(testcall "${testcall} \n CONDITION_FILE ${conditionsfile}")
|
||||||
|
SET(testcall "${testcall} \n CONDITION ")
|
||||||
|
foreach(c ${conditions})
|
||||||
|
SET(testcall "${testcall} \"${c}\"")
|
||||||
|
endforeach()
|
||||||
|
SET(testcall "${testcall} \n ARGS ")
|
||||||
|
foreach(a ${extraargs})
|
||||||
|
SET(testcall "${testcall} ${a}")
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
SET(testcall "${testcall})\n\n")
|
||||||
|
file(APPEND "${EMBREE_INSTALL_CTESTTESTFILE}" "${testcall}")
|
||||||
|
endif()
|
||||||
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
else()
|
||||||
|
FUNCTION(ADD_EMBREE_TEST_ECS testname executable)
|
||||||
|
ENDFUNCTION()
|
||||||
|
FUNCTION(SET_EMBREE_TEST_PROPERTIES testname)
|
||||||
|
ENDFUNCTION()
|
||||||
|
endif()
|
||||||
38
Framework/external/embree/common/cmake/tutorial.cmake
vendored
Normal file
38
Framework/external/embree/common/cmake/tutorial.cmake
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# additional parameters (beyond the name) are treated as additional dependencies
|
||||||
|
# if ADDITIONAL_LIBRARIES is set these will be included during linking
|
||||||
|
|
||||||
|
MACRO (ADD_TUTORIAL TUTORIAL_NAME)
|
||||||
|
ADD_EXECUTABLE(embree_${TUTORIAL_NAME} ../../kernels/embree.rc ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.cpp ${ARGN})
|
||||||
|
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME} embree image tutorial noise ${ADDITIONAL_LIBRARIES})
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME} PROPERTY FOLDER tutorials/single)
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME} APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
|
||||||
|
INSTALL(TARGETS embree_${TUTORIAL_NAME} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||||
|
SIGN_TARGET(embree_${TUTORIAL_NAME})
|
||||||
|
ENDMACRO ()
|
||||||
|
|
||||||
|
MACRO (ADD_TUTORIAL_SYCL TUTORIAL_NAME)
|
||||||
|
IF (EMBREE_SYCL_SUPPORT)
|
||||||
|
ADD_EXECUTABLE(embree_${TUTORIAL_NAME}_sycl ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.cpp ${ARGN})
|
||||||
|
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME}_sycl embree image tutorial_sycl noise ${ADDITIONAL_LIBRARIES})
|
||||||
|
TARGET_COMPILE_DEFINITIONS(embree_${TUTORIAL_NAME}_sycl PUBLIC EMBREE_SYCL_TUTORIAL)
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl PROPERTY FOLDER tutorials/sycl)
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST} ${CMAKE_CXX_FLAGS_SYCL}")
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl APPEND PROPERTY LINK_FLAGS "${CMAKE_LINK_FLAGS_SYCL}")
|
||||||
|
INSTALL(TARGETS embree_${TUTORIAL_NAME}_sycl DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT examples)
|
||||||
|
SIGN_TARGET(embree_${TUTORIAL_NAME}_sycl)
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO ()
|
||||||
|
|
||||||
|
MACRO (ADD_TUTORIAL_ISPC TUTORIAL_NAME)
|
||||||
|
IF (EMBREE_ISPC_SUPPORT)
|
||||||
|
ADD_EMBREE_ISPC_EXECUTABLE(embree_${TUTORIAL_NAME}_ispc ../../kernels/embree.rc ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.ispc ${ARGN})
|
||||||
|
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME}_ispc embree image tutorial_ispc noise noise_ispc)
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_ispc PROPERTY FOLDER tutorials/ispc)
|
||||||
|
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_ispc APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
|
||||||
|
INSTALL(TARGETS embree_${TUTORIAL_NAME}_ispc DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
|
||||||
|
SIGN_TARGET(embree_${TUTORIAL_NAME}_ispc)
|
||||||
|
ENDIF()
|
||||||
|
ENDMACRO ()
|
||||||
24
Framework/external/embree/common/cmake/uninstall.cmake.in
vendored
Normal file
24
Framework/external/embree/common/cmake/uninstall.cmake.in
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||||
|
MESSAGE(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||||
|
ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||||
|
|
||||||
|
FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
|
||||||
|
STRING(REGEX REPLACE "\n" ";" files "${files}")
|
||||||
|
FOREACH(file ${files})
|
||||||
|
MESSAGE(STATUS "Uninstalling $ENV{DESTDIR}${file}")
|
||||||
|
IF(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
|
||||||
|
EXEC_PROGRAM(
|
||||||
|
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
|
||||||
|
OUTPUT_VARIABLE rm_out
|
||||||
|
RETURN_VALUE rm_retval
|
||||||
|
)
|
||||||
|
IF(NOT "${rm_retval}" STREQUAL 0)
|
||||||
|
MESSAGE(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
|
||||||
|
ENDIF(NOT "${rm_retval}" STREQUAL 0)
|
||||||
|
ELSE(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
|
||||||
|
MESSAGE(STATUS "File $ENV{DESTDIR}${file} does not exist.")
|
||||||
|
ENDIF(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
|
||||||
|
ENDFOREACH(file)
|
||||||
17
Framework/external/embree/common/lexers/CMakeLists.txt
vendored
Normal file
17
Framework/external/embree/common/lexers/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ADD_LIBRARY(lexers STATIC
|
||||||
|
stringstream.cpp
|
||||||
|
tokenstream.cpp
|
||||||
|
)
|
||||||
|
TARGET_LINK_LIBRARIES(lexers sys math)
|
||||||
|
SET_PROPERTY(TARGET lexers PROPERTY FOLDER common)
|
||||||
|
SET_PROPERTY(TARGET lexers APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
|
||||||
|
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
INSTALL(TARGETS lexers EXPORT lexers-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
|
||||||
|
INSTALL(EXPORT lexers-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
|
||||||
101
Framework/external/embree/common/lexers/parsestream.h
vendored
Normal file
101
Framework/external/embree/common/lexers/parsestream.h
vendored
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "stringstream.h"
|
||||||
|
#include "../sys/filename.h"
|
||||||
|
#include "../math/vec2.h"
|
||||||
|
#include "../math/vec3.h"
|
||||||
|
#include "../math/col3.h"
|
||||||
|
#include "../math/color.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! helper class for simple command line parsing */
|
||||||
|
class ParseStream : public Stream<std::string>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ParseStream (const Ref<Stream<std::string> >& cin) : cin(cin) {}
|
||||||
|
|
||||||
|
ParseStream (const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
|
||||||
|
const std::string& endl = "", bool multiLine = false)
|
||||||
|
: cin(new StringStream(cin,seps,endl,multiLine)) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
ParseLocation location() { return cin->loc(); }
|
||||||
|
std::string next() { return cin->get(); }
|
||||||
|
|
||||||
|
void force(const std::string& next) {
|
||||||
|
std::string token = getString();
|
||||||
|
if (token != next)
|
||||||
|
THROW_RUNTIME_ERROR("token \""+next+"\" expected but token \""+token+"\" found");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string getString() {
|
||||||
|
return get();
|
||||||
|
}
|
||||||
|
|
||||||
|
FileName getFileName() {
|
||||||
|
return FileName(get());
|
||||||
|
}
|
||||||
|
|
||||||
|
int getInt () {
|
||||||
|
return atoi(get().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec2i getVec2i() {
|
||||||
|
int x = atoi(get().c_str());
|
||||||
|
int y = atoi(get().c_str());
|
||||||
|
return Vec2i(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec3ia getVec3ia() {
|
||||||
|
int x = atoi(get().c_str());
|
||||||
|
int y = atoi(get().c_str());
|
||||||
|
int z = atoi(get().c_str());
|
||||||
|
return Vec3ia(x,y,z);
|
||||||
|
}
|
||||||
|
|
||||||
|
float getFloat() {
|
||||||
|
return (float)atof(get().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec2f getVec2f() {
|
||||||
|
float x = (float)atof(get().c_str());
|
||||||
|
float y = (float)atof(get().c_str());
|
||||||
|
return Vec2f(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec3f getVec3f() {
|
||||||
|
float x = (float)atof(get().c_str());
|
||||||
|
float y = (float)atof(get().c_str());
|
||||||
|
float z = (float)atof(get().c_str());
|
||||||
|
return Vec3f(x,y,z);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec3fa getVec3fa() {
|
||||||
|
float x = (float)atof(get().c_str());
|
||||||
|
float y = (float)atof(get().c_str());
|
||||||
|
float z = (float)atof(get().c_str());
|
||||||
|
return Vec3fa(x,y,z);
|
||||||
|
}
|
||||||
|
|
||||||
|
Col3f getCol3f() {
|
||||||
|
float x = (float)atof(get().c_str());
|
||||||
|
float y = (float)atof(get().c_str());
|
||||||
|
float z = (float)atof(get().c_str());
|
||||||
|
return Col3f(x,y,z);
|
||||||
|
}
|
||||||
|
|
||||||
|
Color getColor() {
|
||||||
|
float r = (float)atof(get().c_str());
|
||||||
|
float g = (float)atof(get().c_str());
|
||||||
|
float b = (float)atof(get().c_str());
|
||||||
|
return Color(r,g,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Ref<Stream<std::string> > cin;
|
||||||
|
};
|
||||||
|
}
|
||||||
215
Framework/external/embree/common/lexers/stream.h
vendored
Normal file
215
Framework/external/embree/common/lexers/stream.h
vendored
Normal file
|
|
@ -0,0 +1,215 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/platform.h"
|
||||||
|
#include "../sys/ref.h"
|
||||||
|
#include "../sys/filename.h"
|
||||||
|
#include "../sys/estring.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! stores the location of a stream element in the source */
|
||||||
|
class ParseLocation
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ParseLocation () : lineNumber(-1), colNumber(-1) {}
|
||||||
|
ParseLocation (std::shared_ptr<std::string> fileName, ssize_t lineNumber, ssize_t colNumber, ssize_t /*charNumber*/)
|
||||||
|
: fileName(fileName), lineNumber(lineNumber), colNumber(colNumber) {}
|
||||||
|
|
||||||
|
std::string str() const
|
||||||
|
{
|
||||||
|
std::string str = "unknown";
|
||||||
|
if (fileName) str = *fileName;
|
||||||
|
if (lineNumber >= 0) str += " line " + toString(lineNumber);
|
||||||
|
if (lineNumber >= 0 && colNumber >= 0) str += " character " + toString(colNumber);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<std::string> fileName; /// name of the file (or stream) the token is from
|
||||||
|
ssize_t lineNumber; /// the line number the token is from
|
||||||
|
ssize_t colNumber; /// the character number in the current line
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! a stream class templated over the stream elements */
|
||||||
|
template<typename T> class Stream : public RefCount
|
||||||
|
{
|
||||||
|
enum { BUF_SIZE = 1024 };
|
||||||
|
|
||||||
|
private:
|
||||||
|
virtual T next() = 0;
|
||||||
|
virtual ParseLocation location() = 0;
|
||||||
|
__forceinline std::pair<T,ParseLocation> nextHelper() {
|
||||||
|
ParseLocation l = location();
|
||||||
|
T v = next();
|
||||||
|
return std::pair<T,ParseLocation>(v,l);
|
||||||
|
}
|
||||||
|
__forceinline void push_back(const std::pair<T,ParseLocation>& v) {
|
||||||
|
if (past+future == BUF_SIZE) pop_front();
|
||||||
|
size_t end = (start+past+future++)%BUF_SIZE;
|
||||||
|
buffer[end] = v;
|
||||||
|
}
|
||||||
|
__forceinline void pop_front() {
|
||||||
|
if (past == 0) THROW_RUNTIME_ERROR("stream buffer empty");
|
||||||
|
start = (start+1)%BUF_SIZE; past--;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
Stream () : start(0), past(0), future(0), buffer(BUF_SIZE) {}
|
||||||
|
virtual ~Stream() {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
const ParseLocation& loc() {
|
||||||
|
if (future == 0) push_back(nextHelper());
|
||||||
|
return buffer[(start+past)%BUF_SIZE].second;
|
||||||
|
}
|
||||||
|
T get() {
|
||||||
|
if (future == 0) push_back(nextHelper());
|
||||||
|
T t = buffer[(start+past)%BUF_SIZE].first;
|
||||||
|
past++; future--;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
const T& peek() {
|
||||||
|
if (future == 0) push_back(nextHelper());
|
||||||
|
return buffer[(start+past)%BUF_SIZE].first;
|
||||||
|
}
|
||||||
|
const T& unget(size_t n = 1) {
|
||||||
|
if (past < n) THROW_RUNTIME_ERROR ("cannot unget that many items");
|
||||||
|
past -= n; future += n;
|
||||||
|
return peek();
|
||||||
|
}
|
||||||
|
void drop() {
|
||||||
|
if (future == 0) push_back(nextHelper());
|
||||||
|
past++; future--;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
size_t start,past,future;
|
||||||
|
std::vector<std::pair<T,ParseLocation> > buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! warps an iostream stream */
|
||||||
|
class StdStream : public Stream<int>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
StdStream (std::istream& cin, const std::string& name = "std::stream")
|
||||||
|
: cin(cin), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
|
||||||
|
~StdStream() {}
|
||||||
|
ParseLocation location() {
|
||||||
|
return ParseLocation(name,lineNumber,colNumber,charNumber);
|
||||||
|
}
|
||||||
|
int next() {
|
||||||
|
int c = cin.get();
|
||||||
|
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||||
|
charNumber++;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::istream& cin;
|
||||||
|
ssize_t lineNumber; /// the line number the token is from
|
||||||
|
ssize_t colNumber; /// the character number in the current line
|
||||||
|
ssize_t charNumber; /// the character in the file
|
||||||
|
std::shared_ptr<std::string> name; /// name of buffer
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! creates a stream from a file */
|
||||||
|
class FileStream : public Stream<int>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FileStream (const FileName& fileName)
|
||||||
|
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
|
||||||
|
{
|
||||||
|
if (ifs) ifs.close();
|
||||||
|
ifs.open(fileName.str());
|
||||||
|
if (!ifs.is_open()) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
|
||||||
|
}
|
||||||
|
~FileStream() {
|
||||||
|
if (ifs) ifs.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
ParseLocation location() {
|
||||||
|
return ParseLocation(name,lineNumber,colNumber,charNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
int next() {
|
||||||
|
int c = ifs.get();
|
||||||
|
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||||
|
charNumber++;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::ifstream ifs;
|
||||||
|
ssize_t lineNumber; /// the line number the token is from
|
||||||
|
ssize_t colNumber; /// the character number in the current line
|
||||||
|
ssize_t charNumber; /// the character in the file
|
||||||
|
std::shared_ptr<std::string> name; /// name of buffer
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! creates a stream from a string */
|
||||||
|
class StrStream : public Stream<int>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
StrStream (const char* str)
|
||||||
|
: str(str), lineNumber(1), colNumber(0), charNumber(0) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
ParseLocation location() {
|
||||||
|
return ParseLocation(std::shared_ptr<std::string>(),lineNumber,colNumber,charNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
int next() {
|
||||||
|
int c = str[charNumber];
|
||||||
|
if (c == 0) return EOF;
|
||||||
|
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||||
|
charNumber++;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* str;
|
||||||
|
ssize_t lineNumber; /// the line number the token is from
|
||||||
|
ssize_t colNumber; /// the character number in the current line
|
||||||
|
ssize_t charNumber; /// the character in the file
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! creates a character stream from a command line */
|
||||||
|
class CommandLineStream : public Stream<int>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CommandLineStream (int argc, char** argv, const std::string& name = "command line")
|
||||||
|
: i(0), j(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name)))
|
||||||
|
{
|
||||||
|
if (argc > 0) {
|
||||||
|
for (size_t i=0; argv[0][i] && i<1024; i++) charNumber++;
|
||||||
|
charNumber++;
|
||||||
|
}
|
||||||
|
for (ssize_t k=1; k<argc; k++) args.push_back(argv[k]);
|
||||||
|
}
|
||||||
|
~CommandLineStream() {}
|
||||||
|
public:
|
||||||
|
ParseLocation location() {
|
||||||
|
return ParseLocation(name,0,charNumber,charNumber);
|
||||||
|
}
|
||||||
|
int next() {
|
||||||
|
if (i == args.size()) return EOF;
|
||||||
|
if (j == args[i].size()) { i++; j=0; charNumber++; return ' '; }
|
||||||
|
charNumber++;
|
||||||
|
return args[i][j++];
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
size_t i,j;
|
||||||
|
std::vector<std::string> args;
|
||||||
|
ssize_t charNumber; /// the character in the file
|
||||||
|
std::shared_ptr<std::string> name; /// name of buffer
|
||||||
|
};
|
||||||
|
}
|
||||||
39
Framework/external/embree/common/lexers/streamfilters.h
vendored
Normal file
39
Framework/external/embree/common/lexers/streamfilters.h
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "stream.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/* removes all line comments from a stream */
|
||||||
|
class LineCommentFilter : public Stream<int>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
LineCommentFilter (const FileName& fileName, const std::string& lineComment)
|
||||||
|
: cin(new FileStream(fileName)), lineComment(lineComment) {}
|
||||||
|
LineCommentFilter (Ref<Stream<int> > cin, const std::string& lineComment)
|
||||||
|
: cin(cin), lineComment(lineComment) {}
|
||||||
|
|
||||||
|
ParseLocation location() { return cin->loc(); }
|
||||||
|
|
||||||
|
int next()
|
||||||
|
{
|
||||||
|
/* look if the line comment starts here */
|
||||||
|
for (size_t j=0; j<lineComment.size(); j++) {
|
||||||
|
if (cin->peek() != lineComment[j]) { cin->unget(j); goto not_found; }
|
||||||
|
cin->get();
|
||||||
|
}
|
||||||
|
/* eat all characters until the end of the line (or file) */
|
||||||
|
while (cin->peek() != '\n' && cin->peek() != EOF) cin->get();
|
||||||
|
|
||||||
|
not_found:
|
||||||
|
return cin->get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Ref<Stream<int> > cin;
|
||||||
|
std::string lineComment;
|
||||||
|
};
|
||||||
|
}
|
||||||
48
Framework/external/embree/common/lexers/stringstream.cpp
vendored
Normal file
48
Framework/external/embree/common/lexers/stringstream.cpp
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#include "stringstream.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
static const std::string stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
|
||||||
|
|
||||||
|
/* creates map for fast categorization of characters */
|
||||||
|
static void createCharMap(bool map[256], const std::string& chrs) {
|
||||||
|
for (size_t i=0; i<256; i++) map[i] = false;
|
||||||
|
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* simple tokenizer */
|
||||||
|
StringStream::StringStream(const Ref<Stream<int> >& cin, const std::string& seps, const std::string& endl, bool multiLine)
|
||||||
|
: cin(cin), endl(endl), multiLine(multiLine)
|
||||||
|
{
|
||||||
|
createCharMap(isSepMap,seps);
|
||||||
|
createCharMap(isValidCharMap,stringChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StringStream::next()
|
||||||
|
{
|
||||||
|
/* skip separators */
|
||||||
|
while (cin->peek() != EOF) {
|
||||||
|
if (endl != "" && cin->peek() == '\n') { cin->drop(); return endl; }
|
||||||
|
if (multiLine && cin->peek() == '\\') {
|
||||||
|
cin->drop();
|
||||||
|
if (cin->peek() == '\n') { cin->drop(); continue; }
|
||||||
|
cin->unget();
|
||||||
|
}
|
||||||
|
if (!isSeparator(cin->peek())) break;
|
||||||
|
cin->drop();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* parse everything until the next separator */
|
||||||
|
std::vector<char> str; str.reserve(64);
|
||||||
|
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
|
||||||
|
int c = cin->get();
|
||||||
|
if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
|
||||||
|
str.push_back((char)c);
|
||||||
|
}
|
||||||
|
str.push_back(0);
|
||||||
|
return std::string(str.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
29
Framework/external/embree/common/lexers/stringstream.h
vendored
Normal file
29
Framework/external/embree/common/lexers/stringstream.h
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "stream.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! simple tokenizer that produces a string stream */
|
||||||
|
class StringStream : public Stream<std::string>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
StringStream(const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
|
||||||
|
const std::string& endl = "", bool multiLine = false);
|
||||||
|
public:
|
||||||
|
ParseLocation location() { return cin->loc(); }
|
||||||
|
std::string next();
|
||||||
|
private:
|
||||||
|
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
|
||||||
|
__forceinline bool isValidChar(unsigned int c) const { return c<256 && isValidCharMap[c]; }
|
||||||
|
private:
|
||||||
|
Ref<Stream<int> > cin; /*! source character stream */
|
||||||
|
bool isSepMap[256]; /*! map for fast classification of separators */
|
||||||
|
bool isValidCharMap[256]; /*! map for valid characters */
|
||||||
|
std::string endl; /*! the token of the end of line */
|
||||||
|
bool multiLine; /*! whether to parse lines wrapped with \ */
|
||||||
|
};
|
||||||
|
}
|
||||||
181
Framework/external/embree/common/lexers/tokenstream.cpp
vendored
Normal file
181
Framework/external/embree/common/lexers/tokenstream.cpp
vendored
Normal file
|
|
@ -0,0 +1,181 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#include "tokenstream.h"
|
||||||
|
#include "../math/emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/* shorthands for common sets of characters */
|
||||||
|
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
|
||||||
|
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||||
|
const std::string TokenStream::numbers = "0123456789";
|
||||||
|
const std::string TokenStream::separators = "\n\t\r ";
|
||||||
|
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
|
||||||
|
|
||||||
|
/* creates map for fast categorization of characters */
|
||||||
|
static void createCharMap(bool map[256], const std::string& chrs) {
|
||||||
|
for (size_t i=0; i<256; i++) map[i] = false;
|
||||||
|
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* build full tokenizer that takes list of valid characters and keywords */
|
||||||
|
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
|
||||||
|
const std::string& alpha, //< valid characters for identifiers
|
||||||
|
const std::string& seps, //< characters that act as separators
|
||||||
|
const std::vector<std::string>& symbols) //< symbols
|
||||||
|
: cin(cin), symbols(symbols)
|
||||||
|
{
|
||||||
|
createCharMap(isAlphaMap,alpha);
|
||||||
|
createCharMap(isSepMap,seps);
|
||||||
|
createCharMap(isStringCharMap,stringChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::decDigits(std::string& str_o)
|
||||||
|
{
|
||||||
|
bool ok = false;
|
||||||
|
std::string str;
|
||||||
|
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
|
||||||
|
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
|
||||||
|
if (ok) str_o += str;
|
||||||
|
else cin->unget(str.size());
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::decDigits1(std::string& str_o)
|
||||||
|
{
|
||||||
|
bool ok = false;
|
||||||
|
std::string str;
|
||||||
|
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
|
||||||
|
if (ok) str_o += str; else cin->unget(str.size());
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::trySymbol(const std::string& symbol)
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos < symbol.size()) {
|
||||||
|
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
|
||||||
|
cin->drop(); pos++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
|
||||||
|
{
|
||||||
|
for (size_t i=0; i<symbols.size(); i++) {
|
||||||
|
if (!trySymbol(symbols[i])) continue;
|
||||||
|
token = Token(symbols[i],Token::TY_SYMBOL,loc);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
|
||||||
|
{
|
||||||
|
bool ok = false;
|
||||||
|
std::string str;
|
||||||
|
if (trySymbol("nan")) {
|
||||||
|
token = Token(float(nan));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (trySymbol("+inf")) {
|
||||||
|
token = Token(float(pos_inf));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (trySymbol("-inf")) {
|
||||||
|
token = Token(float(neg_inf));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decDigits(str))
|
||||||
|
{
|
||||||
|
if (cin->peek() == '.') {
|
||||||
|
str += (char)cin->get();
|
||||||
|
decDigits(str);
|
||||||
|
if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||||
|
str += (char)cin->get();
|
||||||
|
if (decDigits(str)) ok = true; // 1.[2]E2
|
||||||
|
}
|
||||||
|
else ok = true; // 1.[2]
|
||||||
|
}
|
||||||
|
else if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||||
|
str += (char)cin->get();
|
||||||
|
if (decDigits(str)) ok = true; // 1E2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (cin->peek() == '.') {
|
||||||
|
str += (char)cin->get();
|
||||||
|
if (decDigits(str)) {
|
||||||
|
if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||||
|
str += (char)cin->get();
|
||||||
|
if (decDigits(str)) ok = true; // .3E2
|
||||||
|
}
|
||||||
|
else ok = true; // .3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ok) {
|
||||||
|
token = Token((float)atof(str.c_str()),loc);
|
||||||
|
}
|
||||||
|
else cin->unget(str.size());
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
|
||||||
|
std::string str;
|
||||||
|
if (decDigits(str)) {
|
||||||
|
token = Token(atoi(str.c_str()),loc);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
|
||||||
|
{
|
||||||
|
std::string str;
|
||||||
|
if (cin->peek() != '\"') return false;
|
||||||
|
cin->drop();
|
||||||
|
while (cin->peek() != '\"') {
|
||||||
|
const int c = cin->get();
|
||||||
|
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
|
||||||
|
str += (char)c;
|
||||||
|
}
|
||||||
|
cin->drop();
|
||||||
|
token = Token(str,Token::TY_STRING,loc);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
|
||||||
|
{
|
||||||
|
std::string str;
|
||||||
|
if (!isAlpha(cin->peek())) return false;
|
||||||
|
str += (char)cin->get();
|
||||||
|
while (isAlphaNum(cin->peek())) str += (char)cin->get();
|
||||||
|
token = Token(str,Token::TY_IDENTIFIER,loc);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenStream::skipSeparators()
|
||||||
|
{
|
||||||
|
/* skip separators */
|
||||||
|
while (cin->peek() != EOF && isSeparator(cin->peek()))
|
||||||
|
cin->drop();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token TokenStream::next()
|
||||||
|
{
|
||||||
|
Token token;
|
||||||
|
skipSeparators();
|
||||||
|
ParseLocation loc = cin->loc();
|
||||||
|
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
|
||||||
|
if (tryFloat (token,loc)) return token; /**< try to parse float */
|
||||||
|
if (tryInt (token,loc)) return token; /**< try to parse integer */
|
||||||
|
if (tryString (token,loc)) return token; /**< try to parse string */
|
||||||
|
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
|
||||||
|
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
|
||||||
|
return Token((char)cin->get(),loc); /**< return invalid character token */
|
||||||
|
}
|
||||||
|
}
|
||||||
164
Framework/external/embree/common/lexers/tokenstream.h
vendored
Normal file
164
Framework/external/embree/common/lexers/tokenstream.h
vendored
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "stream.h"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! token class */
|
||||||
|
class Token
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
|
||||||
|
|
||||||
|
Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
|
||||||
|
Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
|
||||||
|
Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
|
||||||
|
Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
|
||||||
|
Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
|
||||||
|
|
||||||
|
static Token Eof() { return Token(); }
|
||||||
|
static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
|
||||||
|
static Token Str(std::string str) { return Token(str,TY_STRING); }
|
||||||
|
static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
|
||||||
|
|
||||||
|
char Char() const {
|
||||||
|
if (ty == TY_CHAR) return c;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": character expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
int Int() const {
|
||||||
|
if (ty == TY_INT) return i;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": integer expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
float Float(bool cast = true) const {
|
||||||
|
if (ty == TY_FLOAT) return f;
|
||||||
|
if (ty == TY_INT && cast) return (float)i;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": float expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Identifier() const {
|
||||||
|
if (ty == TY_IDENTIFIER) return str;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string String() const {
|
||||||
|
if (ty == TY_STRING) return str;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": string expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Symbol() const {
|
||||||
|
if (ty == TY_SYMBOL) return str;
|
||||||
|
THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
const ParseLocation& Location() const { return loc; }
|
||||||
|
|
||||||
|
friend bool operator==(const Token& a, const Token& b)
|
||||||
|
{
|
||||||
|
if (a.ty != b.ty) return false;
|
||||||
|
if (a.ty == TY_CHAR) return a.c == b.c;
|
||||||
|
if (a.ty == TY_INT) return a.i == b.i;
|
||||||
|
if (a.ty == TY_FLOAT) return a.f == b.f;
|
||||||
|
if (a.ty == TY_IDENTIFIER) return a.str == b.str;
|
||||||
|
if (a.ty == TY_STRING) return a.str == b.str;
|
||||||
|
if (a.ty == TY_SYMBOL) return a.str == b.str;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const Token& a, const Token& b) {
|
||||||
|
return !(a == b);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator <( const Token& a, const Token& b ) {
|
||||||
|
if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
|
||||||
|
if (a.ty == TY_CHAR) return a.c < b.c;
|
||||||
|
if (a.ty == TY_INT) return a.i < b.i;
|
||||||
|
if (a.ty == TY_FLOAT) return a.f < b.f;
|
||||||
|
if (a.ty == TY_IDENTIFIER) return a.str < b.str;
|
||||||
|
if (a.ty == TY_STRING) return a.str < b.str;
|
||||||
|
if (a.ty == TY_SYMBOL) return a.str < b.str;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend std::ostream& operator<<(std::ostream& cout, const Token& t)
|
||||||
|
{
|
||||||
|
if (t.ty == TY_EOF) return cout << "eof";
|
||||||
|
if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
|
||||||
|
if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
|
||||||
|
if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
|
||||||
|
if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
|
||||||
|
if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
|
||||||
|
if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
|
||||||
|
return cout << "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Type ty; //< the type of the token
|
||||||
|
union {
|
||||||
|
char c; //< data for char tokens
|
||||||
|
int i; //< data for int tokens
|
||||||
|
float f; //< data for float tokens
|
||||||
|
};
|
||||||
|
std::string str; //< data for string and identifier tokens
|
||||||
|
ParseLocation loc; //< the location the token is from
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! build full tokenizer that takes list of valid characters and keywords */
|
||||||
|
class TokenStream : public Stream<Token>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*! shorthands for common sets of characters */
|
||||||
|
static const std::string alpha;
|
||||||
|
static const std::string ALPHA;
|
||||||
|
static const std::string numbers;
|
||||||
|
static const std::string separators;
|
||||||
|
static const std::string stringChars;
|
||||||
|
|
||||||
|
public:
|
||||||
|
TokenStream(const Ref<Stream<int> >& cin,
|
||||||
|
const std::string& alpha, //< valid characters for identifiers
|
||||||
|
const std::string& seps, //< characters that act as separators
|
||||||
|
const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
|
||||||
|
public:
|
||||||
|
ParseLocation location() { return cin->loc(); }
|
||||||
|
Token next();
|
||||||
|
bool trySymbol(const std::string& symbol);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void skipSeparators();
|
||||||
|
bool decDigits(std::string& str);
|
||||||
|
bool decDigits1(std::string& str);
|
||||||
|
bool trySymbols(Token& token, const ParseLocation& loc);
|
||||||
|
bool tryFloat(Token& token, const ParseLocation& loc);
|
||||||
|
bool tryInt(Token& token, const ParseLocation& loc);
|
||||||
|
bool tryString(Token& token, const ParseLocation& loc);
|
||||||
|
bool tryIdentifier(Token& token, const ParseLocation& loc);
|
||||||
|
|
||||||
|
Ref<Stream<int> > cin;
|
||||||
|
bool isSepMap[256];
|
||||||
|
bool isAlphaMap[256];
|
||||||
|
bool isStringCharMap[256];
|
||||||
|
std::vector<std::string> symbols;
|
||||||
|
|
||||||
|
/*! checks if a character is a separator */
|
||||||
|
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
|
||||||
|
|
||||||
|
/*! checks if a character is a number */
|
||||||
|
__forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
|
||||||
|
|
||||||
|
/*! checks if a character is valid inside a string */
|
||||||
|
__forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
|
||||||
|
|
||||||
|
/*! checks if a character is legal for an identifier */
|
||||||
|
__forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
|
||||||
|
__forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
|
||||||
|
};
|
||||||
|
}
|
||||||
12
Framework/external/embree/common/math/CMakeLists.txt
vendored
Normal file
12
Framework/external/embree/common/math/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
## Copyright 2009-2021 Intel Corporation
|
||||||
|
## SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ADD_LIBRARY(math STATIC constants.cpp)
|
||||||
|
SET_PROPERTY(TARGET math PROPERTY FOLDER common)
|
||||||
|
SET_PROPERTY(TARGET math APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
|
||||||
|
|
||||||
|
IF (EMBREE_STATIC_LIB)
|
||||||
|
INSTALL(TARGETS math EXPORT math-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
|
||||||
|
INSTALL(EXPORT math-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
361
Framework/external/embree/common/math/affinespace.h
vendored
Normal file
361
Framework/external/embree/common/math/affinespace.h
vendored
Normal file
|
|
@ -0,0 +1,361 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "linearspace2.h"
|
||||||
|
#include "linearspace3.h"
|
||||||
|
#include "quaternion.h"
|
||||||
|
#include "bbox.h"
|
||||||
|
#include "vec4.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
#define VectorT typename L::Vector
|
||||||
|
#define ScalarT typename L::Vector::Scalar
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Affine Space
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L>
|
||||||
|
struct AffineSpaceT
|
||||||
|
{
|
||||||
|
L l; /*< linear part of affine space */
|
||||||
|
VectorT p; /*< affine part of affine space */
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Constructors, Assignment, Cast, Copy Operations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline AffineSpaceT ( ) { }
|
||||||
|
__forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
|
||||||
|
__forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
|
||||||
|
__forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
|
||||||
|
|
||||||
|
__forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
|
||||||
|
__forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
|
||||||
|
|
||||||
|
template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
|
||||||
|
__forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
|
||||||
|
|
||||||
|
/*! return matrix for scaling */
|
||||||
|
static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
|
||||||
|
|
||||||
|
/*! return matrix for translation */
|
||||||
|
static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
|
||||||
|
|
||||||
|
/*! return matrix for rotation, only in 2D */
|
||||||
|
static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
|
||||||
|
|
||||||
|
/*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
|
||||||
|
static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
|
||||||
|
|
||||||
|
/*! return matrix for rotation around arbitrary axis and point, only in 3D */
|
||||||
|
static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
|
||||||
|
|
||||||
|
/*! return matrix for looking at given point, only in 3D */
|
||||||
|
static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
|
||||||
|
VectorT Z = normalize(point-eye);
|
||||||
|
VectorT U = normalize(cross(up,Z));
|
||||||
|
VectorT V = normalize(cross(Z,U));
|
||||||
|
return AffineSpaceT(L(U,V,Z),eye);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
// template specialization to get correct identity matrix for type AffineSpace3fa
|
||||||
|
template<>
|
||||||
|
__forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
|
||||||
|
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
|
||||||
|
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
|
||||||
|
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
|
||||||
|
|
||||||
|
template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
|
||||||
|
template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
|
||||||
|
template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
|
||||||
|
|
||||||
|
__forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
|
||||||
|
{
|
||||||
|
BBox3fa dst = empty;
|
||||||
|
const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
|
||||||
|
const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
|
||||||
|
const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
|
||||||
|
const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
|
||||||
|
const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
|
||||||
|
const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
|
||||||
|
const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
|
||||||
|
const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
|
||||||
|
template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
|
||||||
|
return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
|
||||||
|
return cout << "{ l = " << m.l << ", p = " << m.p << " }";
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Template Instantiations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
|
||||||
|
typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
|
||||||
|
typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
|
||||||
|
typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
|
||||||
|
typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
|
||||||
|
typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
|
||||||
|
|
||||||
|
template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
|
||||||
|
|
||||||
|
template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
|
||||||
|
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Interpolation
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<typename T, typename R>
|
||||||
|
__forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
|
||||||
|
const AffineSpaceT<T>& M1,
|
||||||
|
const R& t)
|
||||||
|
{
|
||||||
|
return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
|
||||||
|
}
|
||||||
|
|
||||||
|
// slerp interprets the 16 floats of the matrix M = D * R * S as components of
|
||||||
|
// three matrizes (D, R, S) that are interpolated individually.
|
||||||
|
template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
|
||||||
|
slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
|
||||||
|
const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
|
||||||
|
const T& t)
|
||||||
|
{
|
||||||
|
QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
|
||||||
|
QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
|
||||||
|
QuaternionT<T> q = slerp(q0, q1, t);
|
||||||
|
|
||||||
|
AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
|
||||||
|
AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
|
||||||
|
D.p.x = S.l.vx.y;
|
||||||
|
D.p.y = S.l.vx.z;
|
||||||
|
D.p.z = S.l.vy.z;
|
||||||
|
S.l.vx.y = 0;
|
||||||
|
S.l.vx.z = 0;
|
||||||
|
S.l.vy.z = 0;
|
||||||
|
|
||||||
|
AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
|
||||||
|
return D * R * S;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this is a specialized version for Vec3fa because that does
|
||||||
|
// not play along nicely with the other templated Vec3/Vec4 types
|
||||||
|
__forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
|
||||||
|
const AffineSpace3ff& M1,
|
||||||
|
const float& t)
|
||||||
|
{
|
||||||
|
Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
|
||||||
|
Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
|
||||||
|
Quaternion3f q = slerp(q0, q1, t);
|
||||||
|
|
||||||
|
AffineSpace3fa S = lerp(M0, M1, t);
|
||||||
|
AffineSpace3fa D(one);
|
||||||
|
D.p.x = S.l.vx.y;
|
||||||
|
D.p.y = S.l.vx.z;
|
||||||
|
D.p.z = S.l.vy.z;
|
||||||
|
S.l.vx.y = 0;
|
||||||
|
S.l.vx.z = 0;
|
||||||
|
S.l.vy.z = 0;
|
||||||
|
|
||||||
|
AffineSpace3fa R = LinearSpace3fa(q);
|
||||||
|
return D * R * S;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
|
||||||
|
{
|
||||||
|
// compute affine transform from quaternion decomposition
|
||||||
|
Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
|
||||||
|
AffineSpace3fa M = qd;
|
||||||
|
AffineSpace3fa D(one);
|
||||||
|
D.p.x = M.l.vx.y;
|
||||||
|
D.p.y = M.l.vx.z;
|
||||||
|
D.p.z = M.l.vy.z;
|
||||||
|
M.l.vx.y = 0;
|
||||||
|
M.l.vx.z = 0;
|
||||||
|
M.l.vy.z = 0;
|
||||||
|
AffineSpace3fa R = LinearSpace3fa(q);
|
||||||
|
return D * R * M;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
|
||||||
|
{
|
||||||
|
q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
|
||||||
|
S = qd;
|
||||||
|
T.x = qd.l.vx.y;
|
||||||
|
T.y = qd.l.vx.z;
|
||||||
|
T.z = qd.l.vy.z;
|
||||||
|
S.l.vx.y = 0;
|
||||||
|
S.l.vx.z = 0;
|
||||||
|
S.l.vy.z = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
|
||||||
|
{
|
||||||
|
AffineSpace3ff M = S;
|
||||||
|
M.l.vx.w = q.i;
|
||||||
|
M.l.vy.w = q.j;
|
||||||
|
M.l.vz.w = q.k;
|
||||||
|
M.p.w = q.r;
|
||||||
|
M.l.vx.y = T.x;
|
||||||
|
M.l.vx.z = T.y;
|
||||||
|
M.l.vy.z = T.z;
|
||||||
|
return M;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct __aligned(16) QuaternionDecomposition
|
||||||
|
{
|
||||||
|
float scale_x = 1.f;
|
||||||
|
float scale_y = 1.f;
|
||||||
|
float scale_z = 1.f;
|
||||||
|
float skew_xy = 0.f;
|
||||||
|
float skew_xz = 0.f;
|
||||||
|
float skew_yz = 0.f;
|
||||||
|
float shift_x = 0.f;
|
||||||
|
float shift_y = 0.f;
|
||||||
|
float shift_z = 0.f;
|
||||||
|
float quaternion_r = 1.f;
|
||||||
|
float quaternion_i = 0.f;
|
||||||
|
float quaternion_j = 0.f;
|
||||||
|
float quaternion_k = 0.f;
|
||||||
|
float translation_x = 0.f;
|
||||||
|
float translation_y = 0.f;
|
||||||
|
float translation_z = 0.f;
|
||||||
|
};
|
||||||
|
|
||||||
|
__forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
|
||||||
|
{
|
||||||
|
QuaternionDecomposition qd;
|
||||||
|
qd.scale_x = M.l.vx.x;
|
||||||
|
qd.scale_y = M.l.vy.y;
|
||||||
|
qd.scale_z = M.l.vz.z;
|
||||||
|
qd.shift_x = M.p.x;
|
||||||
|
qd.shift_y = M.p.y;
|
||||||
|
qd.shift_z = M.p.z;
|
||||||
|
qd.translation_x = M.l.vx.y;
|
||||||
|
qd.translation_y = M.l.vx.z;
|
||||||
|
qd.translation_z = M.l.vy.z;
|
||||||
|
qd.skew_xy = M.l.vy.x;
|
||||||
|
qd.skew_xz = M.l.vz.x;
|
||||||
|
qd.skew_yz = M.l.vz.y;
|
||||||
|
qd.quaternion_r = M.p.w;
|
||||||
|
qd.quaternion_i = M.l.vx.w;
|
||||||
|
qd.quaternion_j = M.l.vy.w;
|
||||||
|
qd.quaternion_k = M.l.vz.w;
|
||||||
|
return qd;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/*
|
||||||
|
* ! Template Specialization for 2D: return matrix for rotation around point
|
||||||
|
* (rotation around arbitrarty vector is not meaningful in 2D)
|
||||||
|
*/
|
||||||
|
template<> __forceinline
|
||||||
|
AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
|
||||||
|
return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Similarity Transform
|
||||||
|
//
|
||||||
|
// checks, if M is a similarity transformation, i.e if there exists a factor D
|
||||||
|
// such that for all x,y: distance(Mx, My) = D * distance(x, y)
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
__forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
|
||||||
|
{
|
||||||
|
if (D) *D = 0.f;
|
||||||
|
if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
|
||||||
|
if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
|
||||||
|
if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
|
||||||
|
|
||||||
|
const float D_x = dot(M.l.vx, M.l.vx);
|
||||||
|
const float D_y = dot(M.l.vy, M.l.vy);
|
||||||
|
const float D_z = dot(M.l.vz, M.l.vz);
|
||||||
|
|
||||||
|
if (abs(D_x - D_y) > 1e-5f ||
|
||||||
|
abs(D_x - D_z) > 1e-5f ||
|
||||||
|
abs(D_y - D_z) > 1e-5f)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (D) *D = sqrtf(D_x);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
|
||||||
|
{
|
||||||
|
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
|
||||||
|
Vec3fa::storeu(&ptr->l.vy, source.l.vy);
|
||||||
|
Vec3fa::storeu(&ptr->l.vz, source.l.vz);
|
||||||
|
Vec3fa::storeu(&ptr->p, source.p);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
|
||||||
|
{
|
||||||
|
AffineSpace3fa space;
|
||||||
|
space.l.vx = Vec3fa::loadu(&ptr->l.vx);
|
||||||
|
space.l.vy = Vec3fa::loadu(&ptr->l.vy);
|
||||||
|
space.l.vz = Vec3fa::loadu(&ptr->l.vz);
|
||||||
|
space.p = Vec3fa::loadu(&ptr->p);
|
||||||
|
return space;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef VectorT
|
||||||
|
#undef ScalarT
|
||||||
|
}
|
||||||
336
Framework/external/embree/common/math/bbox.h
vendored
Normal file
336
Framework/external/embree/common/math/bbox.h
vendored
Normal file
|
|
@ -0,0 +1,336 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec2.h"
|
||||||
|
#include "vec3.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
|
||||||
|
template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
|
||||||
|
template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
template<typename T>
|
||||||
|
struct BBox
|
||||||
|
{
|
||||||
|
T lower, upper;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline BBox ( ) { }
|
||||||
|
template<typename T1>
|
||||||
|
__forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
|
||||||
|
__forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
|
||||||
|
|
||||||
|
__forceinline BBox ( const T& v ) : lower(v), upper(v) {}
|
||||||
|
__forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Extending Bounds
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
|
||||||
|
__forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
|
||||||
|
|
||||||
|
/*! tests if box is empty */
|
||||||
|
__forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
|
||||||
|
|
||||||
|
/*! computes the size of the box */
|
||||||
|
__forceinline T size() const { return upper - lower; }
|
||||||
|
|
||||||
|
/*! computes the center of the box */
|
||||||
|
__forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
|
||||||
|
|
||||||
|
/*! computes twice the center of the box */
|
||||||
|
__forceinline T center2() const { return lower+upper; }
|
||||||
|
|
||||||
|
/*! merges two boxes */
|
||||||
|
__forceinline static const BBox merge (const BBox& a, const BBox& b) {
|
||||||
|
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! intersects two boxes */
|
||||||
|
__forceinline static const BBox intersect (const BBox& a, const BBox& b) {
|
||||||
|
return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! enlarge box by some scaling factor */
|
||||||
|
__forceinline BBox enlarge_by(const float a) const {
|
||||||
|
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
|
||||||
|
__forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
|
||||||
|
__forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
|
||||||
|
__forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
|
||||||
|
__forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
|
||||||
|
__forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> __forceinline bool BBox<float>::empty() const {
|
||||||
|
return lower > upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
template<> __forceinline bool BBox<Vec3fa>::empty() const {
|
||||||
|
return !all(le_mask(lower,upper));
|
||||||
|
}
|
||||||
|
template<> __forceinline bool BBox<Vec3fx>::empty() const {
|
||||||
|
return !all(le_mask(lower,upper));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! tests if box is finite */
|
||||||
|
__forceinline bool isvalid( const BBox<Vec3fa>& v ) {
|
||||||
|
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! tests if box is finite and non-empty*/
|
||||||
|
__forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
|
||||||
|
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! tests if box has finite entries */
|
||||||
|
__forceinline bool is_finite( const BBox<Vec3fa>& b) {
|
||||||
|
return is_finite(b.lower) && is_finite(b.upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! test if point contained in box */
|
||||||
|
__forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
|
||||||
|
|
||||||
|
/*! computes the center of the box */
|
||||||
|
template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
|
||||||
|
template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
|
||||||
|
|
||||||
|
/*! computes the volume of a bounding box */
|
||||||
|
__forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
|
||||||
|
__forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
|
||||||
|
|
||||||
|
/*! computes the volume of a bounding box */
|
||||||
|
__forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
|
||||||
|
|
||||||
|
/*! computes the surface area of a bounding box */
|
||||||
|
template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
|
||||||
|
|
||||||
|
template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
|
||||||
|
template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
|
||||||
|
|
||||||
|
__forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
|
||||||
|
__forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
|
||||||
|
|
||||||
|
__forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
|
||||||
|
__forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
|
||||||
|
|
||||||
|
template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
|
||||||
|
return halfArea(box);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! merges bounding boxes and points */
|
||||||
|
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
|
||||||
|
template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
|
||||||
|
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
|
||||||
|
|
||||||
|
/*! Merges three boxes. */
|
||||||
|
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
|
||||||
|
|
||||||
|
/*! Merges four boxes. */
|
||||||
|
template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
|
||||||
|
return merge(merge(a,b),merge(c,d));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Comparison Operators */
|
||||||
|
template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
|
||||||
|
template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
|
||||||
|
|
||||||
|
/*! scaling */
|
||||||
|
template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
|
||||||
|
template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
|
||||||
|
|
||||||
|
/*! translations */
|
||||||
|
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
|
||||||
|
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
|
||||||
|
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
|
||||||
|
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
|
||||||
|
|
||||||
|
/*! extension */
|
||||||
|
template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
|
||||||
|
|
||||||
|
/*! intersect bounding boxes */
|
||||||
|
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
|
||||||
|
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
|
||||||
|
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
|
||||||
|
|
||||||
|
/*! subtract bounds from each other */
|
||||||
|
template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
|
||||||
|
{
|
||||||
|
c.lower = a.lower;
|
||||||
|
c.upper = min(a.upper,b.lower);
|
||||||
|
d.lower = max(a.lower,b.upper);
|
||||||
|
d.upper = a.upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! tests if bounding boxes (and points) are disjoint (empty intersection) */
|
||||||
|
template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
|
||||||
|
template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
|
||||||
|
template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
|
||||||
|
|
||||||
|
/*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
|
||||||
|
template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
|
||||||
|
template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
|
||||||
|
template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
|
||||||
|
|
||||||
|
/*! subset relation */
|
||||||
|
template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
|
||||||
|
{
|
||||||
|
for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
|
||||||
|
for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
|
||||||
|
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
|
||||||
|
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! blending */
|
||||||
|
template<typename T>
|
||||||
|
__forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
|
||||||
|
return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
|
||||||
|
return cout << "[" << box.lower << "; " << box.upper << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! default template instantiations */
|
||||||
|
typedef BBox<float> BBox1f;
|
||||||
|
typedef BBox<Vec2f> BBox2f;
|
||||||
|
typedef BBox<Vec2fa> BBox2fa;
|
||||||
|
typedef BBox<Vec3f> BBox3f;
|
||||||
|
typedef BBox<Vec3fa> BBox3fa;
|
||||||
|
typedef BBox<Vec3fx> BBox3fx;
|
||||||
|
typedef BBox<Vec3ff> BBox3ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE / AVX / MIC specializations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined (__SSE__) || defined(__ARM_NEON)
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (__AVX__)
|
||||||
|
#include "../simd/avx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
#include "../simd/avx512.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<int N>
|
||||||
|
__forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
|
||||||
|
|
||||||
|
template<>
|
||||||
|
__forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
|
||||||
|
{
|
||||||
|
BBox<Vec3<vfloat4>> dest;
|
||||||
|
|
||||||
|
transpose((vfloat4&)bounds[0].lower,
|
||||||
|
(vfloat4&)bounds[1].lower,
|
||||||
|
(vfloat4&)bounds[2].lower,
|
||||||
|
(vfloat4&)bounds[3].lower,
|
||||||
|
dest.lower.x,
|
||||||
|
dest.lower.y,
|
||||||
|
dest.lower.z);
|
||||||
|
|
||||||
|
transpose((vfloat4&)bounds[0].upper,
|
||||||
|
(vfloat4&)bounds[1].upper,
|
||||||
|
(vfloat4&)bounds[2].upper,
|
||||||
|
(vfloat4&)bounds[3].upper,
|
||||||
|
dest.upper.x,
|
||||||
|
dest.upper.y,
|
||||||
|
dest.upper.z);
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<>
|
||||||
|
__forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
|
||||||
|
{
|
||||||
|
BBox<Vec3<vfloat8>> dest;
|
||||||
|
|
||||||
|
transpose((vfloat4&)bounds[0].lower,
|
||||||
|
(vfloat4&)bounds[1].lower,
|
||||||
|
(vfloat4&)bounds[2].lower,
|
||||||
|
(vfloat4&)bounds[3].lower,
|
||||||
|
(vfloat4&)bounds[4].lower,
|
||||||
|
(vfloat4&)bounds[5].lower,
|
||||||
|
(vfloat4&)bounds[6].lower,
|
||||||
|
(vfloat4&)bounds[7].lower,
|
||||||
|
dest.lower.x,
|
||||||
|
dest.lower.y,
|
||||||
|
dest.lower.z);
|
||||||
|
|
||||||
|
transpose((vfloat4&)bounds[0].upper,
|
||||||
|
(vfloat4&)bounds[1].upper,
|
||||||
|
(vfloat4&)bounds[2].upper,
|
||||||
|
(vfloat4&)bounds[3].upper,
|
||||||
|
(vfloat4&)bounds[4].upper,
|
||||||
|
(vfloat4&)bounds[5].upper,
|
||||||
|
(vfloat4&)bounds[6].upper,
|
||||||
|
(vfloat4&)bounds[7].upper,
|
||||||
|
dest.upper.x,
|
||||||
|
dest.upper.y,
|
||||||
|
dest.upper.z);
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
__forceinline BBox3fa merge(const BBox3fa* bounds);
|
||||||
|
|
||||||
|
template<>
|
||||||
|
__forceinline BBox3fa merge<4>(const BBox3fa* bounds)
|
||||||
|
{
|
||||||
|
const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
|
||||||
|
min(bounds[2].lower,bounds[3].lower));
|
||||||
|
const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
|
||||||
|
max(bounds[2].upper,bounds[3].upper));
|
||||||
|
return BBox3fa(lower,upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<>
|
||||||
|
__forceinline BBox3fa merge<8>(const BBox3fa* bounds)
|
||||||
|
{
|
||||||
|
const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
|
||||||
|
min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
|
||||||
|
const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
|
||||||
|
max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
|
||||||
|
return BBox3fa(lower,upper);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
47
Framework/external/embree/common/math/col3.h
vendored
Normal file
47
Framework/external/embree/common/math/col3.h
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// RGB Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct Col3
|
||||||
|
{
|
||||||
|
T r, g, b;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Col3 ( ) { }
|
||||||
|
__forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
|
||||||
|
__forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
|
||||||
|
__forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
|
||||||
|
__forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
|
||||||
|
__forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
|
||||||
|
__forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
|
||||||
|
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! default template instantiations */
|
||||||
|
typedef Col3<unsigned char> Col3uc;
|
||||||
|
typedef Col3<float > Col3f;
|
||||||
|
}
|
||||||
47
Framework/external/embree/common/math/col4.h
vendored
Normal file
47
Framework/external/embree/common/math/col4.h
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// RGBA Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct Col4
|
||||||
|
{
|
||||||
|
T r, g, b, a;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Col4 ( ) { }
|
||||||
|
__forceinline Col4 ( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; }
|
||||||
|
__forceinline Col4& operator=( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Col4 (const T& v) : r(v), g(v), b(v), a(v) {}
|
||||||
|
__forceinline Col4 (const T& r, const T& g, const T& b, const T& a) : r(r), g(g), b(b), a(a) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Col4 (ZeroTy) : r(zero) , g(zero) , b(zero) , a(zero) {}
|
||||||
|
__forceinline Col4 (OneTy) : r(one) , g(one) , b(one) , a(one) {}
|
||||||
|
__forceinline Col4 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf), a(pos_inf) {}
|
||||||
|
__forceinline Col4 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf), a(neg_inf) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col4<T>& a) {
|
||||||
|
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ", " << a.a << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! default template instantiations */
|
||||||
|
typedef Col4<unsigned char> Col4uc;
|
||||||
|
typedef Col4<float > Col4f;
|
||||||
|
}
|
||||||
268
Framework/external/embree/common/math/color.h
vendored
Normal file
268
Framework/external/embree/common/math/color.h
vendored
Normal file
|
|
@ -0,0 +1,268 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "color_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "constants.h"
|
||||||
|
#include "col3.h"
|
||||||
|
#include "col4.h"
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE RGBA Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct Color4
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { float r,g,b,a; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color4 () {}
|
||||||
|
__forceinline Color4 ( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color4 (const float v) : m128(_mm_set1_ps(v)) {}
|
||||||
|
__forceinline Color4 (const float r, const float g, const float b, const float a) : m128(_mm_set_ps(a,b,g,r)) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color4 ( const Col3uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(255.0f,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
|
||||||
|
__forceinline explicit Color4 ( const Col3f& other ) { m128 = _mm_set_ps(1.0f,other.b,other.g,other.r); }
|
||||||
|
__forceinline explicit Color4 ( const Col4uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(other.a,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
|
||||||
|
__forceinline explicit Color4 ( const Col4f& other ) { m128 = _mm_set_ps(other.a,other.b,other.g,other.r); }
|
||||||
|
|
||||||
|
__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
|
||||||
|
__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline operator const __m128&() const { return m128; }
|
||||||
|
__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Set
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||||
|
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
|
||||||
|
__forceinline void set(Col3uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
}
|
||||||
|
__forceinline void set(Col4uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
d.a = (unsigned char)(s[3]);
|
||||||
|
}
|
||||||
|
__forceinline void set(float &f) const
|
||||||
|
{
|
||||||
|
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color4( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
|
||||||
|
__forceinline Color4( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
|
||||||
|
__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE RGB Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct Color
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { float r,g,b; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color () {}
|
||||||
|
__forceinline Color ( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color (const float v) : m128(_mm_set1_ps(v)) {}
|
||||||
|
__forceinline Color (const float r, const float g, const float b) : m128(_mm_set_ps(0.0f,b,g,r)) {}
|
||||||
|
|
||||||
|
__forceinline Color ( const Color& other ) : m128(other.m128) {}
|
||||||
|
__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline Color ( const Color4& other ) : m128(other.m128) {}
|
||||||
|
__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline operator const __m128&() const { return m128; }
|
||||||
|
__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Set
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||||
|
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
|
||||||
|
__forceinline void set(Col3uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
}
|
||||||
|
__forceinline void set(Col4uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
d.a = 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
|
||||||
|
__forceinline Color( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
|
||||||
|
__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator +( const Color& a ) { return a; }
|
||||||
|
__forceinline const Color operator -( const Color& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||||
|
return _mm_xor_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline const Color abs ( const Color& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||||
|
return _mm_and_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline const Color rcp ( const Color& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 reciprocal = _mm_rcp_ps(a.m128);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
|
||||||
|
return (const Color)reciprocal;
|
||||||
|
#else
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
const Color r = _mm_rcp14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
const Color r = _mm_rcp_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(a, r)))); // computes r + r * (1 - a * r)
|
||||||
|
|
||||||
|
#endif //defined(__aarch64__)
|
||||||
|
}
|
||||||
|
__forceinline const Color rsqrt( const Color& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
return r;
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
__m128 r = _mm_rsqrt14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
|
||||||
|
|
||||||
|
#endif //defined(__aarch64__)
|
||||||
|
}
|
||||||
|
__forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator +( const Color& a, const Color& b ) { return _mm_add_ps(a.m128, b.m128); }
|
||||||
|
__forceinline const Color operator -( const Color& a, const Color& b ) { return _mm_sub_ps(a.m128, b.m128); }
|
||||||
|
__forceinline const Color operator *( const Color& a, const Color& b ) { return _mm_mul_ps(a.m128, b.m128); }
|
||||||
|
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
|
||||||
|
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
|
||||||
|
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
|
||||||
|
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
|
||||||
|
|
||||||
|
__forceinline const Color min( const Color& a, const Color& b ) { return _mm_min_ps(a.m128,b.m128); }
|
||||||
|
__forceinline const Color max( const Color& a, const Color& b ) { return _mm_max_ps(a.m128,b.m128); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
|
||||||
|
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
|
||||||
|
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
|
||||||
|
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
|
||||||
|
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
|
||||||
|
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
|
||||||
|
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
|
||||||
|
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
|
||||||
|
__forceinline bool operator !=( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
|
||||||
|
__forceinline bool operator < ( const Color& a, const Color& b ) {
|
||||||
|
if (a.r != b.r) return a.r < b.r;
|
||||||
|
if (a.g != b.g) return a.g < b.g;
|
||||||
|
if (a.b != b.b) return a.b < b.b;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
|
||||||
|
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
|
||||||
|
return blendv_ps(f, t, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Special Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/*! computes luminance of a color */
|
||||||
|
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Color& a) {
|
||||||
|
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
219
Framework/external/embree/common/math/color_sycl.h
vendored
Normal file
219
Framework/external/embree/common/math/color_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "constants.h"
|
||||||
|
#include "col3.h"
|
||||||
|
#include "col4.h"
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE RGBA Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct Color4
|
||||||
|
{
|
||||||
|
struct { float r,g,b,a; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color4 () {}
|
||||||
|
//__forceinline Color4 ( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
|
||||||
|
__forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
|
||||||
|
__forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
|
||||||
|
__forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
|
||||||
|
__forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
|
||||||
|
|
||||||
|
//__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
|
||||||
|
//__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Set
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||||
|
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
|
||||||
|
|
||||||
|
__forceinline void set(Col3uc& d) const
|
||||||
|
{
|
||||||
|
d.r = (unsigned char)(clamp(r)*255.0f);
|
||||||
|
d.g = (unsigned char)(clamp(g)*255.0f);
|
||||||
|
d.b = (unsigned char)(clamp(b)*255.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void set(Col4uc& d) const
|
||||||
|
{
|
||||||
|
d.r = (unsigned char)(clamp(r)*255.0f);
|
||||||
|
d.g = (unsigned char)(clamp(g)*255.0f);
|
||||||
|
d.b = (unsigned char)(clamp(b)*255.0f);
|
||||||
|
d.a = (unsigned char)(clamp(a)*255.0f);
|
||||||
|
}
|
||||||
|
__forceinline void set(float &f) const
|
||||||
|
{
|
||||||
|
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
|
||||||
|
__forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
|
||||||
|
//__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
//__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE RGB Color Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct Color
|
||||||
|
{
|
||||||
|
struct { float r,g,b; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color () {}
|
||||||
|
//__forceinline Color ( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
|
||||||
|
__forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
|
||||||
|
|
||||||
|
//__forceinline Color ( const Color& other ) : m128(other.m128) {}
|
||||||
|
//__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
//__forceinline Color ( const Color4& other ) : m128(other.m128) {}
|
||||||
|
//__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Set
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||||
|
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
__forceinline void set(Col3uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
}
|
||||||
|
__forceinline void set(Col4uc& d) const
|
||||||
|
{
|
||||||
|
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||||
|
d.r = (unsigned char)(s[0]);
|
||||||
|
d.g = (unsigned char)(s[1]);
|
||||||
|
d.b = (unsigned char)(s[2]);
|
||||||
|
d.a = 255;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
|
||||||
|
__forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
|
||||||
|
//__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
//__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator +( const Color& a ) { return a; }
|
||||||
|
__forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
|
||||||
|
__forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
|
||||||
|
__forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
|
||||||
|
__forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
|
||||||
|
__forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
|
||||||
|
__forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
|
||||||
|
__forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
|
||||||
|
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
|
||||||
|
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
|
||||||
|
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
|
||||||
|
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
|
||||||
|
|
||||||
|
__forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
|
||||||
|
__forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
|
||||||
|
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
|
||||||
|
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
|
||||||
|
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
|
||||||
|
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
|
||||||
|
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
|
||||||
|
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
|
||||||
|
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
|
||||||
|
__forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
|
||||||
|
__forceinline bool operator < ( const Color& a, const Color& b ) {
|
||||||
|
if (a.r != b.r) return a.r < b.r;
|
||||||
|
if (a.g != b.g) return a.g < b.g;
|
||||||
|
if (a.b != b.b) return a.b < b.b;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
|
||||||
|
return s ? t : f;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Special Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/*! computes luminance of a color */
|
||||||
|
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
|
||||||
|
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
8
Framework/external/embree/common/math/constants.cpp
vendored
Normal file
8
Framework/external/embree/common/math/constants.cpp
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#include "constants.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
}
|
||||||
209
Framework/external/embree/common/math/constants.h
vendored
Normal file
209
Framework/external/embree/common/math/constants.h
vendored
Normal file
|
|
@ -0,0 +1,209 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/platform.h"
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#define _USE_MATH_DEFINES
|
||||||
|
#include <math.h> // using cmath causes issues under Windows
|
||||||
|
#include <cfloat>
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f;
|
||||||
|
static MAYBE_UNUSED const float min_rcp_input = 1E-18f; // for abs(x) >= min_rcp_input the newton raphson rcp calculation does not fail
|
||||||
|
|
||||||
|
/* we consider floating point numbers in that range as valid input numbers */
|
||||||
|
static MAYBE_UNUSED float FLT_LARGE = 1.844E18f;
|
||||||
|
|
||||||
|
struct TrueTy {
|
||||||
|
__forceinline operator bool( ) const { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr TrueTy True = TrueTy();
|
||||||
|
|
||||||
|
struct FalseTy {
|
||||||
|
__forceinline operator bool( ) const { return false; }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr FalseTy False = FalseTy();
|
||||||
|
|
||||||
|
struct ZeroTy
|
||||||
|
{
|
||||||
|
__forceinline operator double ( ) const { return 0; }
|
||||||
|
__forceinline operator float ( ) const { return 0; }
|
||||||
|
__forceinline operator long long( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned long long( ) const { return 0; }
|
||||||
|
__forceinline operator long ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned long ( ) const { return 0; }
|
||||||
|
__forceinline operator int ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned int ( ) const { return 0; }
|
||||||
|
__forceinline operator short ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned short ( ) const { return 0; }
|
||||||
|
__forceinline operator char ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned char ( ) const { return 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr ZeroTy zero = ZeroTy();
|
||||||
|
|
||||||
|
struct OneTy
|
||||||
|
{
|
||||||
|
__forceinline operator double ( ) const { return 1; }
|
||||||
|
__forceinline operator float ( ) const { return 1; }
|
||||||
|
__forceinline operator long long( ) const { return 1; }
|
||||||
|
__forceinline operator unsigned long long( ) const { return 1; }
|
||||||
|
__forceinline operator long ( ) const { return 1; }
|
||||||
|
__forceinline operator unsigned long ( ) const { return 1; }
|
||||||
|
__forceinline operator int ( ) const { return 1; }
|
||||||
|
__forceinline operator unsigned int ( ) const { return 1; }
|
||||||
|
__forceinline operator short ( ) const { return 1; }
|
||||||
|
__forceinline operator unsigned short ( ) const { return 1; }
|
||||||
|
__forceinline operator char ( ) const { return 1; }
|
||||||
|
__forceinline operator unsigned char ( ) const { return 1; }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr OneTy one = OneTy();
|
||||||
|
|
||||||
|
struct NegInfTy
|
||||||
|
{
|
||||||
|
__forceinline operator double ( ) const { return -std::numeric_limits<double>::infinity(); }
|
||||||
|
__forceinline operator float ( ) const { return -std::numeric_limits<float>::infinity(); }
|
||||||
|
__forceinline operator long long( ) const { return std::numeric_limits<long long>::min(); }
|
||||||
|
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::min(); }
|
||||||
|
__forceinline operator long ( ) const { return std::numeric_limits<long>::min(); }
|
||||||
|
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::min(); }
|
||||||
|
__forceinline operator int ( ) const { return std::numeric_limits<int>::min(); }
|
||||||
|
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::min(); }
|
||||||
|
__forceinline operator short ( ) const { return std::numeric_limits<short>::min(); }
|
||||||
|
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::min(); }
|
||||||
|
__forceinline operator char ( ) const { return std::numeric_limits<char>::min(); }
|
||||||
|
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::min(); }
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr NegInfTy neg_inf = NegInfTy();
|
||||||
|
|
||||||
|
struct PosInfTy
|
||||||
|
{
|
||||||
|
__forceinline operator double ( ) const { return std::numeric_limits<double>::infinity(); }
|
||||||
|
__forceinline operator float ( ) const { return std::numeric_limits<float>::infinity(); }
|
||||||
|
__forceinline operator long long( ) const { return std::numeric_limits<long long>::max(); }
|
||||||
|
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::max(); }
|
||||||
|
__forceinline operator long ( ) const { return std::numeric_limits<long>::max(); }
|
||||||
|
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::max(); }
|
||||||
|
__forceinline operator int ( ) const { return std::numeric_limits<int>::max(); }
|
||||||
|
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::max(); }
|
||||||
|
__forceinline operator short ( ) const { return std::numeric_limits<short>::max(); }
|
||||||
|
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::max(); }
|
||||||
|
__forceinline operator char ( ) const { return std::numeric_limits<char>::max(); }
|
||||||
|
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::max(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr PosInfTy inf = PosInfTy();
|
||||||
|
const constexpr PosInfTy pos_inf = PosInfTy();
|
||||||
|
|
||||||
|
struct NaNTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return std::numeric_limits<double>::quiet_NaN(); }
|
||||||
|
__forceinline operator float ( ) const { return std::numeric_limits<float>::quiet_NaN(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr NaNTy nan = NaNTy();
|
||||||
|
|
||||||
|
struct UlpTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return std::numeric_limits<double>::epsilon(); }
|
||||||
|
__forceinline operator float ( ) const { return std::numeric_limits<float>::epsilon(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr UlpTy ulp = UlpTy();
|
||||||
|
|
||||||
|
struct PiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(M_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(M_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr PiTy pi = PiTy();
|
||||||
|
|
||||||
|
struct OneOverPiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(M_1_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(M_1_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr OneOverPiTy one_over_pi = OneOverPiTy();
|
||||||
|
|
||||||
|
struct TwoPiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(2.0*M_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(2.0*M_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr TwoPiTy two_pi = TwoPiTy();
|
||||||
|
|
||||||
|
struct OneOverTwoPiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(0.5*M_1_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(0.5*M_1_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr OneOverTwoPiTy one_over_two_pi = OneOverTwoPiTy();
|
||||||
|
|
||||||
|
struct FourPiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(4.0*M_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(4.0*M_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr FourPiTy four_pi = FourPiTy();
|
||||||
|
|
||||||
|
struct OneOverFourPiTy
|
||||||
|
{
|
||||||
|
__forceinline operator double( ) const { return double(0.25*M_1_PI); }
|
||||||
|
__forceinline operator float ( ) const { return float(0.25*M_1_PI); }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr OneOverFourPiTy one_over_four_pi = OneOverFourPiTy();
|
||||||
|
|
||||||
|
struct StepTy {
|
||||||
|
__forceinline operator double ( ) const { return 0; }
|
||||||
|
__forceinline operator float ( ) const { return 0; }
|
||||||
|
__forceinline operator long long( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned long long( ) const { return 0; }
|
||||||
|
__forceinline operator long ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned long ( ) const { return 0; }
|
||||||
|
__forceinline operator int ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned int ( ) const { return 0; }
|
||||||
|
__forceinline operator short ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned short ( ) const { return 0; }
|
||||||
|
__forceinline operator char ( ) const { return 0; }
|
||||||
|
__forceinline operator unsigned char ( ) const { return 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr StepTy step = StepTy();
|
||||||
|
|
||||||
|
struct ReverseStepTy {
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr ReverseStepTy reverse_step = ReverseStepTy();
|
||||||
|
|
||||||
|
struct EmptyTy {
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr EmptyTy empty = EmptyTy();
|
||||||
|
|
||||||
|
struct FullTy {
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr FullTy full = FullTy();
|
||||||
|
|
||||||
|
struct UndefinedTy {
|
||||||
|
};
|
||||||
|
|
||||||
|
const constexpr UndefinedTy undefined = UndefinedTy();
|
||||||
|
}
|
||||||
468
Framework/external/embree/common/math/emath.h
vendored
Normal file
468
Framework/external/embree/common/math/emath.h
vendored
Normal file
|
|
@ -0,0 +1,468 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/platform.h"
|
||||||
|
#include "../sys/intrinsics.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "math_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__ARM_NEON)
|
||||||
|
#include "../simd/arm/emulation.h"
|
||||||
|
#else
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__)
|
||||||
|
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
|
||||||
|
namespace std
|
||||||
|
{
|
||||||
|
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
|
||||||
|
__forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
|
||||||
|
__forceinline bool isfinite (const float x) { return _finite(x) != 0; }
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
__forceinline bool isvalid ( const float& v ) {
|
||||||
|
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int cast_f2i(float f) {
|
||||||
|
union { float f; int i; } v; v.f = f; return v.i;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float cast_i2f(int i) {
|
||||||
|
union { float f; int i; } v; v.i = i; return v.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int toInt (const float& a) { return int(a); }
|
||||||
|
__forceinline float toFloat(const int& a) { return float(a); }
|
||||||
|
|
||||||
|
__forceinline int asInt (const float& a) { return *((int*)&a); }
|
||||||
|
__forceinline float asFloat(const int& a) { return *((float*)&a); }
|
||||||
|
|
||||||
|
#if defined(__WIN32__)
|
||||||
|
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
|
||||||
|
__forceinline float sqr ( const float x ) { return x*x; }
|
||||||
|
|
||||||
|
__forceinline float rcp ( const float x )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
// Move scalar to vector register and do rcp.
|
||||||
|
__m128 a;
|
||||||
|
a[0] = x;
|
||||||
|
float32x4_t reciprocal = vrecpeq_f32(a);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
|
||||||
|
return reciprocal[0];
|
||||||
|
#else
|
||||||
|
|
||||||
|
const __m128 a = _mm_set_ss(x);
|
||||||
|
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
|
||||||
|
#else
|
||||||
|
const __m128 r = _mm_rcp_ss(a);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
|
||||||
|
#else
|
||||||
|
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //defined(__aarch64__)
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float signmsk ( const float x ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 a;
|
||||||
|
__m128i b;
|
||||||
|
a[0] = x;
|
||||||
|
b[0] = 0x80000000;
|
||||||
|
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
|
||||||
|
return a[0];
|
||||||
|
#else
|
||||||
|
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__forceinline float xorf( const float x, const float y ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 a;
|
||||||
|
__m128 b;
|
||||||
|
a[0] = x;
|
||||||
|
b[0] = y;
|
||||||
|
a = _mm_xor_ps(a, b);
|
||||||
|
return a[0];
|
||||||
|
#else
|
||||||
|
return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__forceinline float andf( const float x, const unsigned y ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 a;
|
||||||
|
__m128i b;
|
||||||
|
a[0] = x;
|
||||||
|
b[0] = y;
|
||||||
|
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
|
||||||
|
return a[0];
|
||||||
|
#else
|
||||||
|
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__forceinline float rsqrt( const float x )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 a;
|
||||||
|
a[0] = x;
|
||||||
|
__m128 value = _mm_rsqrt_ps(a);
|
||||||
|
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
|
||||||
|
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
|
||||||
|
return value[0];
|
||||||
|
#else
|
||||||
|
|
||||||
|
const __m128 a = _mm_set_ss(x);
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
__m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
|
||||||
|
#else
|
||||||
|
__m128 r = _mm_rsqrt_ss(a);
|
||||||
|
#endif
|
||||||
|
const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
|
||||||
|
_mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
|
||||||
|
return _mm_cvtss_f32(c);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
|
||||||
|
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
|
||||||
|
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
|
||||||
|
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
|
||||||
|
#else
|
||||||
|
__forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
|
||||||
|
__forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline float abs ( const float x ) { return ::fabsf(x); }
|
||||||
|
__forceinline float acos ( const float x ) { return ::acosf (x); }
|
||||||
|
__forceinline float asin ( const float x ) { return ::asinf (x); }
|
||||||
|
__forceinline float atan ( const float x ) { return ::atanf (x); }
|
||||||
|
__forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
|
||||||
|
__forceinline float cos ( const float x ) { return ::cosf (x); }
|
||||||
|
__forceinline float cosh ( const float x ) { return ::coshf (x); }
|
||||||
|
__forceinline float exp ( const float x ) { return ::expf (x); }
|
||||||
|
__forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
|
||||||
|
__forceinline float log ( const float x ) { return ::logf (x); }
|
||||||
|
__forceinline float log10( const float x ) { return ::log10f(x); }
|
||||||
|
__forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
|
||||||
|
__forceinline float sin ( const float x ) { return ::sinf (x); }
|
||||||
|
__forceinline float sinh ( const float x ) { return ::sinhf (x); }
|
||||||
|
__forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
|
||||||
|
__forceinline float tan ( const float x ) { return ::tanf (x); }
|
||||||
|
__forceinline float tanh ( const float x ) { return ::tanhf (x); }
|
||||||
|
__forceinline float floor( const float x ) { return ::floorf (x); }
|
||||||
|
__forceinline float ceil ( const float x ) { return ::ceilf (x); }
|
||||||
|
__forceinline float frac ( const float x ) { return x-floor(x); }
|
||||||
|
|
||||||
|
__forceinline double abs ( const double x ) { return ::fabs(x); }
|
||||||
|
__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
|
||||||
|
__forceinline double acos ( const double x ) { return ::acos (x); }
|
||||||
|
__forceinline double asin ( const double x ) { return ::asin (x); }
|
||||||
|
__forceinline double atan ( const double x ) { return ::atan (x); }
|
||||||
|
__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
|
||||||
|
__forceinline double cos ( const double x ) { return ::cos (x); }
|
||||||
|
__forceinline double cosh ( const double x ) { return ::cosh (x); }
|
||||||
|
__forceinline double exp ( const double x ) { return ::exp (x); }
|
||||||
|
__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
|
||||||
|
__forceinline double log ( const double x ) { return ::log (x); }
|
||||||
|
__forceinline double log10( const double x ) { return ::log10(x); }
|
||||||
|
__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
|
||||||
|
__forceinline double rcp ( const double x ) { return 1.0/x; }
|
||||||
|
__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
|
||||||
|
__forceinline double sin ( const double x ) { return ::sin (x); }
|
||||||
|
__forceinline double sinh ( const double x ) { return ::sinh (x); }
|
||||||
|
__forceinline double sqr ( const double x ) { return x*x; }
|
||||||
|
__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
|
||||||
|
__forceinline double tan ( const double x ) { return ::tan (x); }
|
||||||
|
__forceinline double tanh ( const double x ) { return ::tanh (x); }
|
||||||
|
__forceinline double floor( const double x ) { return ::floor (x); }
|
||||||
|
__forceinline double ceil ( const double x ) { return ::ceil (x); }
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline float mini(float a, float b) {
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 x;
|
||||||
|
__m128 y;
|
||||||
|
x[0] = a;
|
||||||
|
y[0] = b;
|
||||||
|
x = _mm_min_ps(x, y);
|
||||||
|
return x[0];
|
||||||
|
}
|
||||||
|
#elif defined(__SSE4_1__)
|
||||||
|
__forceinline float mini(float a, float b) {
|
||||||
|
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||||
|
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||||
|
const __m128i ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline float maxi(float a, float b) {
|
||||||
|
// FP and Neon shares same vector register in arm64
|
||||||
|
__m128 x;
|
||||||
|
__m128 y;
|
||||||
|
x[0] = a;
|
||||||
|
y[0] = b;
|
||||||
|
x = _mm_max_ps(x, y);
|
||||||
|
return x[0];
|
||||||
|
}
|
||||||
|
#elif defined(__SSE4_1__)
|
||||||
|
__forceinline float maxi(float a, float b) {
|
||||||
|
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||||
|
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||||
|
const __m128i ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline T twice(const T& a) { return a+a; }
|
||||||
|
|
||||||
|
__forceinline int min(int a, int b) { return a<b ? a:b; }
|
||||||
|
__forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
|
||||||
|
__forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
|
||||||
|
__forceinline float min(float a, float b) { return a<b ? a:b; }
|
||||||
|
__forceinline double min(double a, double b) { return a<b ? a:b; }
|
||||||
|
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
|
||||||
|
__forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
|
||||||
|
#endif
|
||||||
|
#if defined(__EMSCRIPTEN__)
|
||||||
|
__forceinline long min(long a, long b) { return a<b ? a:b; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
|
||||||
|
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
|
||||||
|
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
|
||||||
|
|
||||||
|
__forceinline int max(int a, int b) { return a<b ? b:a; }
|
||||||
|
__forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
|
||||||
|
__forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
|
||||||
|
__forceinline float max(float a, float b) { return a<b ? b:a; }
|
||||||
|
__forceinline double max(double a, double b) { return a<b ? b:a; }
|
||||||
|
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
|
||||||
|
__forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
|
||||||
|
#endif
|
||||||
|
#if defined(__EMSCRIPTEN__)
|
||||||
|
__forceinline long max(long a, long b) { return a<b ? b:a; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
|
||||||
|
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
|
||||||
|
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
|
||||||
|
|
||||||
|
#if defined(__MACOSX__)
|
||||||
|
__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
|
||||||
|
__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
|
||||||
|
__forceinline void sincosf(float x, float *sin, float *cos) {
|
||||||
|
__sincosf(x,sin,cos);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__FreeBSD__)
|
||||||
|
__forceinline void sincosf(float x, float *s, float *c) {
|
||||||
|
*s = sinf(x); *c = cosf(x);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
|
||||||
|
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
|
||||||
|
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
|
||||||
|
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
|
||||||
|
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
|
||||||
|
__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
|
||||||
|
__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
|
||||||
|
__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
|
||||||
|
|
||||||
|
#elif defined (__aarch64__) && defined(__clang__)
|
||||||
|
#pragma clang fp contract(fast)
|
||||||
|
__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
|
||||||
|
__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
|
||||||
|
__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
|
||||||
|
__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
|
||||||
|
#pragma clang fp contract(on)
|
||||||
|
|
||||||
|
#else
|
||||||
|
__forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
|
||||||
|
__forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
|
||||||
|
__forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
|
||||||
|
__forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! random functions */
|
||||||
|
template<typename T> T random() { return T(0); }
|
||||||
|
#if defined(_WIN32)
|
||||||
|
template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
|
||||||
|
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
|
||||||
|
#else
|
||||||
|
template<> __forceinline int random() { return int(rand()); }
|
||||||
|
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
|
||||||
|
#endif
|
||||||
|
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
|
||||||
|
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
|
||||||
|
|
||||||
|
#if _WIN32
|
||||||
|
__forceinline double drand48() {
|
||||||
|
return double(rand())/double(RAND_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void srand48(long seed) {
|
||||||
|
return srand(seed);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! selects */
|
||||||
|
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
|
||||||
|
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
|
||||||
|
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
|
||||||
|
|
||||||
|
__forceinline bool none(bool s) { return !s; }
|
||||||
|
__forceinline bool all (bool s) { return s; }
|
||||||
|
__forceinline bool any (bool s) { return s; }
|
||||||
|
|
||||||
|
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
|
||||||
|
|
||||||
|
__forceinline float lerp(const float v0, const float v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
|
||||||
|
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! exchange */
|
||||||
|
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
|
||||||
|
|
||||||
|
/* load/store */
|
||||||
|
template<typename Ty> struct mem;
|
||||||
|
|
||||||
|
template<> struct mem<float> {
|
||||||
|
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||||
|
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||||
|
|
||||||
|
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||||
|
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! bit reverse operation */
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitReverse(const T& vin)
|
||||||
|
{
|
||||||
|
T v = vin;
|
||||||
|
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
|
||||||
|
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
|
||||||
|
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
|
||||||
|
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
|
||||||
|
v = ( v >> 16 ) | ( v << 16);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! bit interleave operation */
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
|
||||||
|
{
|
||||||
|
T x = xin, y = yin, z = zin;
|
||||||
|
x = (x | (x << 16)) & 0x030000FF;
|
||||||
|
x = (x | (x << 8)) & 0x0300F00F;
|
||||||
|
x = (x | (x << 4)) & 0x030C30C3;
|
||||||
|
x = (x | (x << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
y = (y | (y << 16)) & 0x030000FF;
|
||||||
|
y = (y | (y << 8)) & 0x0300F00F;
|
||||||
|
y = (y | (y << 4)) & 0x030C30C3;
|
||||||
|
y = (y | (y << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
z = (z | (z << 16)) & 0x030000FF;
|
||||||
|
z = (z | (z << 8)) & 0x0300F00F;
|
||||||
|
z = (z | (z << 4)) & 0x030C30C3;
|
||||||
|
z = (z | (z << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
return x | (y << 1) | (z << 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||||
|
|
||||||
|
template<>
|
||||||
|
__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
|
||||||
|
{
|
||||||
|
const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
|
||||||
|
const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
|
||||||
|
const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
|
||||||
|
return xx | yy | zz;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! bit interleave operation for 64bit data types*/
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
|
||||||
|
T x = xin & 0x1fffff;
|
||||||
|
T y = yin & 0x1fffff;
|
||||||
|
T z = zin & 0x1fffff;
|
||||||
|
|
||||||
|
x = (x | x << 32) & 0x1f00000000ffff;
|
||||||
|
x = (x | x << 16) & 0x1f0000ff0000ff;
|
||||||
|
x = (x | x << 8) & 0x100f00f00f00f00f;
|
||||||
|
x = (x | x << 4) & 0x10c30c30c30c30c3;
|
||||||
|
x = (x | x << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
y = (y | y << 32) & 0x1f00000000ffff;
|
||||||
|
y = (y | y << 16) & 0x1f0000ff0000ff;
|
||||||
|
y = (y | y << 8) & 0x100f00f00f00f00f;
|
||||||
|
y = (y | y << 4) & 0x10c30c30c30c30c3;
|
||||||
|
y = (y | y << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
z = (z | z << 32) & 0x1f00000000ffff;
|
||||||
|
z = (z | z << 16) & 0x1f0000ff0000ff;
|
||||||
|
z = (z | z << 8) & 0x100f00f00f00f00f;
|
||||||
|
z = (z | z << 4) & 0x10c30c30c30c30c3;
|
||||||
|
z = (z | z << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
return x | (y << 1) | (z << 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
161
Framework/external/embree/common/math/interval.h
vendored
Normal file
161
Framework/external/embree/common/math/interval.h
vendored
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec2.h"
|
||||||
|
#include "vec3.h"
|
||||||
|
#include "bbox.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename V>
|
||||||
|
struct Interval
|
||||||
|
{
|
||||||
|
V lower, upper;
|
||||||
|
|
||||||
|
__forceinline Interval() {}
|
||||||
|
__forceinline Interval ( const Interval& other ) { lower = other.lower; upper = other.upper; }
|
||||||
|
__forceinline Interval& operator=( const Interval& other ) { lower = other.lower; upper = other.upper; return *this; }
|
||||||
|
|
||||||
|
__forceinline Interval(const V& a) : lower(a), upper(a) {}
|
||||||
|
__forceinline Interval(const V& lower, const V& upper) : lower(lower), upper(upper) {}
|
||||||
|
__forceinline Interval(const BBox<V>& a) : lower(a.lower), upper(a.upper) {}
|
||||||
|
|
||||||
|
/*! tests if box is empty */
|
||||||
|
//__forceinline bool empty() const { return lower > upper; }
|
||||||
|
|
||||||
|
/*! computes the size of the interval */
|
||||||
|
__forceinline V size() const { return upper - lower; }
|
||||||
|
|
||||||
|
__forceinline V center() const { return 0.5f*(lower+upper); }
|
||||||
|
|
||||||
|
__forceinline const Interval& extend(const Interval& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
|
||||||
|
__forceinline const Interval& extend(const V & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
|
||||||
|
|
||||||
|
__forceinline friend Interval operator +( const Interval& a, const Interval& b ) {
|
||||||
|
return Interval(a.lower+b.lower,a.upper+b.upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval operator -( const Interval& a, const Interval& b ) {
|
||||||
|
return Interval(a.lower-b.upper,a.upper-b.lower);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval operator -( const Interval& a, const V& b ) {
|
||||||
|
return Interval(a.lower-b,a.upper-b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval operator *( const Interval& a, const Interval& b )
|
||||||
|
{
|
||||||
|
const V ll = a.lower*b.lower;
|
||||||
|
const V lu = a.lower*b.upper;
|
||||||
|
const V ul = a.upper*b.lower;
|
||||||
|
const V uu = a.upper*b.upper;
|
||||||
|
return Interval(min(ll,lu,ul,uu),max(ll,lu,ul,uu));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval merge( const Interval& a, const Interval& b) {
|
||||||
|
return Interval(min(a.lower,b.lower),max(a.upper,b.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c) {
|
||||||
|
return merge(merge(a,b),c);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c, const Interval& d) {
|
||||||
|
return merge(merge(a,b),merge(c,d));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! intersect bounding boxes */
|
||||||
|
__forceinline friend const Interval intersect( const Interval& a, const Interval& b ) { return Interval(max(a.lower, b.lower), min(a.upper, b.upper)); }
|
||||||
|
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c ) { return intersect(a,intersect(b,c)); }
|
||||||
|
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c, const Interval& d ) { return intersect(intersect(a,b),intersect(c,d)); }
|
||||||
|
|
||||||
|
friend embree_ostream operator<<(embree_ostream cout, const Interval& a) {
|
||||||
|
return cout << "[" << a.lower << ", " << a.upper << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Interval( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
|
||||||
|
__forceinline Interval( FullTy ) : lower(neg_inf), upper(pos_inf) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
__forceinline bool isEmpty(const Interval<float>& v) {
|
||||||
|
return v.lower > v.upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline vboolx isEmpty(const Interval<vfloatx>& v) {
|
||||||
|
return v.lower > v.upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! subset relation */
|
||||||
|
template<typename T> __forceinline bool subset( const Interval<T>& a, const Interval<T>& b ) {
|
||||||
|
return (a.lower > b.lower) && (a.upper < b.upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool subset( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
|
||||||
|
return subset(a.x,b.x) && subset(a.y,b.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline const Vec2<Interval<T>> intersect( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
|
||||||
|
return Vec2<Interval<T>>(intersect(a.x,b.x),intersect(a.y,b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Interval<T> select ( bool s, const Interval<T>& t, const Interval<T>& f ) {
|
||||||
|
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Interval<T> select ( const typename T::Bool& s, const Interval<T>& t, const Interval<T>& f ) {
|
||||||
|
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int numRoots(const Interval<float>& p0, const Interval<float>& p1)
|
||||||
|
{
|
||||||
|
float eps = 1E-4f;
|
||||||
|
bool neg0 = p0.lower < eps; bool pos0 = p0.upper > -eps;
|
||||||
|
bool neg1 = p1.lower < eps; bool pos1 = p1.upper > -eps;
|
||||||
|
return (neg0 && pos1) || (pos0 && neg1) || (neg0 && pos0) || (neg1 && pos1);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Interval<float> Interval1f;
|
||||||
|
typedef Vec2<Interval<float>> Interval2f;
|
||||||
|
typedef Vec3<Interval<float>> Interval3f;
|
||||||
|
|
||||||
|
inline void swap(float& a, float& b) { float tmp = a; a = b; b = tmp; }
|
||||||
|
|
||||||
|
inline Interval1f shift(const Interval1f& v, float shift) { return Interval1f(v.lower + shift, v.upper + shift); }
|
||||||
|
|
||||||
|
#define TWO_PI (2.0*M_PI)
|
||||||
|
inline Interval1f sin(Interval1f interval)
|
||||||
|
{
|
||||||
|
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
|
||||||
|
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
|
||||||
|
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
|
||||||
|
float sinLower = sin(interval.lower);
|
||||||
|
float sinUpper = sin(interval.upper);
|
||||||
|
if (sinLower > sinUpper) swap(sinLower, sinUpper);
|
||||||
|
if (interval.lower < M_PI / 2.0 && interval.upper > M_PI / 2.0) sinUpper = 1.0;
|
||||||
|
if (interval.lower < 3.0 * M_PI / 2.0 && interval.upper > 3.0 * M_PI / 2.0) sinLower = -1.0;
|
||||||
|
return Interval1f(sinLower, sinUpper);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Interval1f cos(Interval1f interval)
|
||||||
|
{
|
||||||
|
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
|
||||||
|
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
|
||||||
|
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
|
||||||
|
float cosLower = cos(interval.lower);
|
||||||
|
float cosUpper = cos(interval.upper);
|
||||||
|
if (cosLower > cosUpper) swap(cosLower, cosUpper);
|
||||||
|
if (interval.lower < M_PI && interval.upper > M_PI) cosLower = -1.0;
|
||||||
|
return Interval1f(cosLower, cosUpper);
|
||||||
|
}
|
||||||
|
#undef TWO_PI
|
||||||
|
}
|
||||||
331
Framework/external/embree/common/math/lbbox.h
vendored
Normal file
331
Framework/external/embree/common/math/lbbox.h
vendored
Normal file
|
|
@ -0,0 +1,331 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "bbox.h"
|
||||||
|
#include "range.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename T>
|
||||||
|
__forceinline std::pair<T,T> globalLinear(const std::pair<T,T>& v, const BBox1f& dt)
|
||||||
|
{
|
||||||
|
const float rcp_dt_size = float(1.0f)/dt.size();
|
||||||
|
const T g0 = lerp(v.first,v.second,-dt.lower*rcp_dt_size);
|
||||||
|
const T g1 = lerp(v.first,v.second,(1.0f-dt.lower)*rcp_dt_size);
|
||||||
|
return std::make_pair(g0,g1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct LBBox
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline LBBox () {}
|
||||||
|
|
||||||
|
template<typename T1>
|
||||||
|
__forceinline LBBox ( const LBBox<T1>& other )
|
||||||
|
: bounds0(other.bounds0), bounds1(other.bounds1) {}
|
||||||
|
|
||||||
|
__forceinline LBBox& operator= ( const LBBox& other ) {
|
||||||
|
bounds0 = other.bounds0; bounds1 = other.bounds1; return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline LBBox (EmptyTy)
|
||||||
|
: bounds0(EmptyTy()), bounds1(EmptyTy()) {}
|
||||||
|
|
||||||
|
__forceinline explicit LBBox ( const BBox<T>& bounds)
|
||||||
|
: bounds0(bounds), bounds1(bounds) { }
|
||||||
|
|
||||||
|
__forceinline LBBox ( const BBox<T>& bounds0, const BBox<T>& bounds1)
|
||||||
|
: bounds0(bounds0), bounds1(bounds1) { }
|
||||||
|
|
||||||
|
LBBox ( const avector<BBox<T>>& bounds )
|
||||||
|
{
|
||||||
|
assert(bounds.size());
|
||||||
|
BBox<T> b0 = bounds.front();
|
||||||
|
BBox<T> b1 = bounds.back();
|
||||||
|
for (size_t i=1; i<bounds.size()-1; i++) {
|
||||||
|
const float f = float(i)/float(bounds.size()-1);
|
||||||
|
const BBox<T> bt = lerp(b0,b1,f);
|
||||||
|
const T dlower = min(bounds[i].lower-bt.lower,T(zero));
|
||||||
|
const T dupper = max(bounds[i].upper-bt.upper,T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
bounds0 = b0;
|
||||||
|
bounds1 = b1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! calculates the linear bounds of a primitive for the specified time range */
|
||||||
|
template<typename BoundsFunc>
|
||||||
|
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range, float numTimeSegments)
|
||||||
|
{
|
||||||
|
const float lower = time_range.lower*numTimeSegments;
|
||||||
|
const float upper = time_range.upper*numTimeSegments;
|
||||||
|
const float ilowerf = floor(lower);
|
||||||
|
const float iupperf = ceil(upper);
|
||||||
|
const int ilower = (int)ilowerf;
|
||||||
|
const int iupper = (int)iupperf;
|
||||||
|
|
||||||
|
const BBox<T> blower0 = bounds(ilower);
|
||||||
|
const BBox<T> bupper1 = bounds(iupper);
|
||||||
|
|
||||||
|
if (iupper-ilower == 1) {
|
||||||
|
bounds0 = lerp(blower0, bupper1, lower-ilowerf);
|
||||||
|
bounds1 = lerp(bupper1, blower0, iupperf-upper);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BBox<T> blower1 = bounds(ilower+1);
|
||||||
|
const BBox<T> bupper0 = bounds(iupper-1);
|
||||||
|
BBox<T> b0 = lerp(blower0, blower1, lower-ilowerf);
|
||||||
|
BBox<T> b1 = lerp(bupper1, bupper0, iupperf-upper);
|
||||||
|
|
||||||
|
for (int i = ilower+1; i < iupper; i++)
|
||||||
|
{
|
||||||
|
const float f = (float(i)/numTimeSegments - time_range.lower) / time_range.size();
|
||||||
|
const BBox<T> bt = lerp(b0, b1, f);
|
||||||
|
const BBox<T> bi = bounds(i);
|
||||||
|
const T dlower = min(bi.lower-bt.lower, T(zero));
|
||||||
|
const T dupper = max(bi.upper-bt.upper, T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
|
||||||
|
bounds0 = b0;
|
||||||
|
bounds1 = b1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! calculates the linear bounds of a primitive for the specified time range */
|
||||||
|
template<typename BoundsFunc>
|
||||||
|
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range_in, const BBox1f& geom_time_range, float geom_time_segments)
|
||||||
|
{
|
||||||
|
/* normalize global time_range_in to local geom_time_range */
|
||||||
|
const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(),
|
||||||
|
(time_range_in.upper-geom_time_range.lower)/geom_time_range.size());
|
||||||
|
|
||||||
|
const float lower = time_range.lower*geom_time_segments;
|
||||||
|
const float upper = time_range.upper*geom_time_segments;
|
||||||
|
const float ilowerf = floor(lower);
|
||||||
|
const float iupperf = ceil(upper);
|
||||||
|
const float ilowerfc = max(0.0f,ilowerf);
|
||||||
|
const float iupperfc = min(iupperf,geom_time_segments);
|
||||||
|
const int ilowerc = (int)ilowerfc;
|
||||||
|
const int iupperc = (int)iupperfc;
|
||||||
|
assert(iupperc-ilowerc > 0);
|
||||||
|
|
||||||
|
/* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */
|
||||||
|
const int ilower_iter = max(-1,(int)ilowerf);
|
||||||
|
const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1);
|
||||||
|
|
||||||
|
const BBox<T> blower0 = bounds(ilowerc);
|
||||||
|
const BBox<T> bupper1 = bounds(iupperc);
|
||||||
|
if (iupper_iter-ilower_iter == 1) {
|
||||||
|
bounds0 = lerp(blower0, bupper1, max(0.0f,lower-ilowerfc));
|
||||||
|
bounds1 = lerp(bupper1, blower0, max(0.0f,iupperfc-upper));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BBox<T> blower1 = bounds(ilowerc+1);
|
||||||
|
const BBox<T> bupper0 = bounds(iupperc-1);
|
||||||
|
BBox<T> b0 = lerp(blower0, blower1, max(0.0f,lower-ilowerfc));
|
||||||
|
BBox<T> b1 = lerp(bupper1, bupper0, max(0.0f,iupperfc-upper));
|
||||||
|
|
||||||
|
for (int i = ilower_iter+1; i < iupper_iter; i++)
|
||||||
|
{
|
||||||
|
const float f = (float(i)/geom_time_segments - time_range.lower) / time_range.size();
|
||||||
|
const BBox<T> bt = lerp(b0, b1, f);
|
||||||
|
const BBox<T> bi = bounds(i);
|
||||||
|
const T dlower = min(bi.lower-bt.lower, T(zero));
|
||||||
|
const T dupper = max(bi.upper-bt.upper, T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
|
||||||
|
bounds0 = b0;
|
||||||
|
bounds1 = b1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! calculates the linear bounds of a primitive for the specified time range */
|
||||||
|
template<typename BoundsFunc>
|
||||||
|
__forceinline LBBox(const BoundsFunc& bounds, const range<int>& time_range, int numTimeSegments)
|
||||||
|
{
|
||||||
|
const int ilower = time_range.begin();
|
||||||
|
const int iupper = time_range.end();
|
||||||
|
|
||||||
|
BBox<T> b0 = bounds(ilower);
|
||||||
|
BBox<T> b1 = bounds(iupper);
|
||||||
|
|
||||||
|
if (iupper-ilower == 1)
|
||||||
|
{
|
||||||
|
bounds0 = b0;
|
||||||
|
bounds1 = b1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = ilower+1; i<iupper; i++)
|
||||||
|
{
|
||||||
|
const float f = float(i - time_range.begin()) / float(time_range.size());
|
||||||
|
const BBox<T> bt = lerp(b0, b1, f);
|
||||||
|
const BBox<T> bi = bounds(i);
|
||||||
|
const T dlower = min(bi.lower-bt.lower, T(zero));
|
||||||
|
const T dupper = max(bi.upper-bt.upper, T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
|
||||||
|
bounds0 = b0;
|
||||||
|
bounds1 = b1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
|
||||||
|
__forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
|
||||||
|
{
|
||||||
|
const BBox3f bounds0 = lbounds.bounds0;
|
||||||
|
const BBox3f bounds1 = lbounds.bounds1;
|
||||||
|
|
||||||
|
/* normalize global target_time_range to local time_range_in */
|
||||||
|
const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
|
||||||
|
(target_time_range.upper-time_range_in.lower)/time_range_in.size());
|
||||||
|
|
||||||
|
const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
|
||||||
|
|
||||||
|
/* compute bounds at begin and end of clipped time range */
|
||||||
|
BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
|
||||||
|
BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
|
||||||
|
|
||||||
|
/* make sure that b0 is properly bounded at time_range_in.lower */
|
||||||
|
{
|
||||||
|
const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
|
||||||
|
const T dlower = min(bounds0.lower-bt.lower, T(zero));
|
||||||
|
const T dupper = max(bounds0.upper-bt.upper, T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* make sure that b1 is properly bounded at time_range_in.upper */
|
||||||
|
{
|
||||||
|
const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
|
||||||
|
const T dlower = min(bounds1.lower-bt.lower, T(zero));
|
||||||
|
const T dupper = max(bounds1.upper-bt.upper, T(zero));
|
||||||
|
b0.lower += dlower; b1.lower += dlower;
|
||||||
|
b0.upper += dupper; b1.upper += dupper;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->bounds0 = b0;
|
||||||
|
this->bounds1 = b1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
|
||||||
|
__forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
|
||||||
|
: LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
__forceinline bool empty() const {
|
||||||
|
return bounds().empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline BBox<T> bounds () const {
|
||||||
|
return merge(bounds0,bounds1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline BBox<T> interpolate( const float t ) const {
|
||||||
|
return lerp(bounds0,bounds1,t);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline LBBox<T> interpolate( const BBox1f& dt ) const {
|
||||||
|
return LBBox<T>(interpolate(dt.lower),interpolate(dt.upper));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void extend( const LBBox& other ) {
|
||||||
|
bounds0.extend(other.bounds0);
|
||||||
|
bounds1.extend(other.bounds1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float expectedHalfArea() const;
|
||||||
|
|
||||||
|
__forceinline float expectedHalfArea(const BBox1f& dt) const {
|
||||||
|
return interpolate(dt).expectedHalfArea();
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float expectedApproxHalfArea() const {
|
||||||
|
return 0.5f*(halfArea(bounds0) + halfArea(bounds1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculates bounds for [0,1] time range from bounds in dt time range */
|
||||||
|
__forceinline LBBox global(const BBox1f& dt) const
|
||||||
|
{
|
||||||
|
const float rcp_dt_size = 1.0f/dt.size();
|
||||||
|
const BBox<T> b0 = interpolate(-dt.lower*rcp_dt_size);
|
||||||
|
const BBox<T> b1 = interpolate((1.0f-dt.lower)*rcp_dt_size);
|
||||||
|
return LBBox(b0,b1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Comparison Operators */
|
||||||
|
//template<typename TT> friend __forceinline bool operator==( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
|
||||||
|
//template<typename TT> friend __forceinline bool operator!=( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
|
||||||
|
friend __forceinline bool operator==( const LBBox& a, const LBBox& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
|
||||||
|
friend __forceinline bool operator!=( const LBBox& a, const LBBox& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
|
||||||
|
|
||||||
|
/*! output operator */
|
||||||
|
friend __forceinline embree_ostream operator<<(embree_ostream cout, const LBBox& box) {
|
||||||
|
return cout << "LBBox { " << box.bounds0 << "; " << box.bounds1 << " }";
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
BBox<T> bounds0, bounds1;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! tests if box is finite */
|
||||||
|
template<typename T>
|
||||||
|
__forceinline bool isvalid( const LBBox<T>& v ) {
|
||||||
|
return isvalid(v.bounds0) && isvalid(v.bounds1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline bool isvalid_non_empty( const LBBox<T>& v ) {
|
||||||
|
return isvalid_non_empty(v.bounds0) && isvalid_non_empty(v.bounds1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline T expectedArea(const T& a0, const T& a1, const T& b0, const T& b1)
|
||||||
|
{
|
||||||
|
const T da = a1-a0;
|
||||||
|
const T db = b1-b0;
|
||||||
|
return a0*b0+(a0*db+da*b0)*T(0.5f) + da*db*T(1.0f/3.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __forceinline float LBBox<Vec3fa>::expectedHalfArea() const
|
||||||
|
{
|
||||||
|
const Vec3fa d0 = bounds0.size();
|
||||||
|
const Vec3fa d1 = bounds1.size();
|
||||||
|
return reduce_add(expectedArea(Vec3fa(d0.x,d0.y,d0.z),
|
||||||
|
Vec3fa(d1.x,d1.y,d1.z),
|
||||||
|
Vec3fa(d0.y,d0.z,d0.x),
|
||||||
|
Vec3fa(d1.y,d1.z,d1.x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline float expectedApproxHalfArea(const LBBox<T>& box) {
|
||||||
|
return box.expectedApproxHalfArea();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline LBBox<T> merge(const LBBox<T>& a, const LBBox<T>& b) {
|
||||||
|
return LBBox<T>(merge(a.bounds0, b.bounds0), merge(a.bounds1, b.bounds1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! subset relation */
|
||||||
|
template<typename T> __inline bool subset( const LBBox<T>& a, const LBBox<T>& b ) {
|
||||||
|
return subset(a.bounds0,b.bounds0) && subset(a.bounds1,b.bounds1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! default template instantiations */
|
||||||
|
typedef LBBox<float> LBBox1f;
|
||||||
|
typedef LBBox<Vec2f> LBBox2f;
|
||||||
|
typedef LBBox<Vec3f> LBBox3f;
|
||||||
|
typedef LBBox<Vec3fa> LBBox3fa;
|
||||||
|
typedef LBBox<Vec3fx> LBBox3fx;
|
||||||
|
}
|
||||||
149
Framework/external/embree/common/math/linearspace2.h
vendored
Normal file
149
Framework/external/embree/common/math/linearspace2.h
vendored
Normal file
|
|
@ -0,0 +1,149 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec2.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// 2D Linear Transform (2x2 Matrix)
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct LinearSpace2
|
||||||
|
{
|
||||||
|
typedef T Vector;
|
||||||
|
typedef typename T::Scalar Scalar;
|
||||||
|
|
||||||
|
/*! default matrix constructor */
|
||||||
|
__forceinline LinearSpace2 ( ) {}
|
||||||
|
|
||||||
|
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
|
||||||
|
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
|
||||||
|
|
||||||
|
template<typename L1> __forceinline LinearSpace2( const LinearSpace2<L1>& s ) : vx(s.vx), vy(s.vy) {}
|
||||||
|
|
||||||
|
/*! matrix construction from column vectors */
|
||||||
|
__forceinline LinearSpace2(const Vector& vx, const Vector& vy)
|
||||||
|
: vx(vx), vy(vy) {}
|
||||||
|
|
||||||
|
/*! matrix construction from row mayor data */
|
||||||
|
__forceinline LinearSpace2(const Scalar& m00, const Scalar& m01,
|
||||||
|
const Scalar& m10, const Scalar& m11)
|
||||||
|
: vx(m00,m10), vy(m01,m11) {}
|
||||||
|
|
||||||
|
/*! compute the determinant of the matrix */
|
||||||
|
__forceinline const Scalar det() const { return vx.x*vy.y - vx.y*vy.x; }
|
||||||
|
|
||||||
|
/*! compute adjoint matrix */
|
||||||
|
__forceinline const LinearSpace2 adjoint() const { return LinearSpace2(vy.y,-vy.x,-vx.y,vx.x); }
|
||||||
|
|
||||||
|
/*! compute inverse matrix */
|
||||||
|
__forceinline const LinearSpace2 inverse() const { return adjoint()/det(); }
|
||||||
|
|
||||||
|
/*! compute transposed matrix */
|
||||||
|
__forceinline const LinearSpace2 transposed() const { return LinearSpace2(vx.x,vx.y,vy.x,vy.y); }
|
||||||
|
|
||||||
|
/*! returns first row of matrix */
|
||||||
|
__forceinline Vector row0() const { return Vector(vx.x,vy.x); }
|
||||||
|
|
||||||
|
/*! returns second row of matrix */
|
||||||
|
__forceinline Vector row1() const { return Vector(vx.y,vy.y); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline LinearSpace2( ZeroTy ) : vx(zero), vy(zero) {}
|
||||||
|
__forceinline LinearSpace2( OneTy ) : vx(one, zero), vy(zero, one) {}
|
||||||
|
|
||||||
|
/*! return matrix for scaling */
|
||||||
|
static __forceinline LinearSpace2 scale(const Vector& s) {
|
||||||
|
return LinearSpace2(s.x, 0,
|
||||||
|
0 , s.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! return matrix for rotation */
|
||||||
|
static __forceinline LinearSpace2 rotate(const Scalar& r) {
|
||||||
|
Scalar s = sin(r), c = cos(r);
|
||||||
|
return LinearSpace2(c, -s,
|
||||||
|
s, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! return closest orthogonal matrix (i.e. a general rotation including reflection) */
|
||||||
|
LinearSpace2 orthogonal() const
|
||||||
|
{
|
||||||
|
LinearSpace2 m = *this;
|
||||||
|
|
||||||
|
// mirrored?
|
||||||
|
Scalar mirror(one);
|
||||||
|
if (m.det() < Scalar(zero)) {
|
||||||
|
m.vx = -m.vx;
|
||||||
|
mirror = -mirror;
|
||||||
|
}
|
||||||
|
|
||||||
|
// rotation
|
||||||
|
for (int i = 0; i < 99; i++) {
|
||||||
|
const LinearSpace2 m_next = 0.5 * (m + m.transposed().inverse());
|
||||||
|
const LinearSpace2 d = m_next - m;
|
||||||
|
m = m_next;
|
||||||
|
// norm^2 of difference small enough?
|
||||||
|
if (max(dot(d.vx, d.vx), dot(d.vy, d.vy)) < 1e-8)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// rotation * mirror_x
|
||||||
|
return LinearSpace2(mirror*m.vx, m.vy);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*! the column vectors of the matrix */
|
||||||
|
Vector vx,vy;
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a ) { return LinearSpace2<T>(-a.vx,-a.vy); }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a ) { return LinearSpace2<T>(+a.vx,+a.vy); }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> rcp ( const LinearSpace2<T>& a ) { return a.inverse(); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx+b.vx,a.vy+b.vy); }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx-b.vx,a.vy-b.vy); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator*(const typename T::Scalar & a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
|
||||||
|
template<typename T> __forceinline T operator*(const LinearSpace2<T>& a, const T & b) { return b.x*a.vx + b.y*a.vy; }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator*(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const typename T::Scalar & b) { return LinearSpace2<T>(a.vx/b, a.vy/b); }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return a * rcp(b); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace2<T>& operator *=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a * b; }
|
||||||
|
template<typename T> __forceinline LinearSpace2<T>& operator /=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx == b.vx && a.vy == b.vy; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx != b.vx || a.vy != b.vy; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace2<T>& m) {
|
||||||
|
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Shortcuts for common linear spaces. */
|
||||||
|
typedef LinearSpace2<Vec2f> LinearSpace2f;
|
||||||
|
typedef LinearSpace2<Vec2fa> LinearSpace2fa;
|
||||||
|
}
|
||||||
217
Framework/external/embree/common/math/linearspace3.h
vendored
Normal file
217
Framework/external/embree/common/math/linearspace3.h
vendored
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec3.h"
|
||||||
|
#include "quaternion.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// 3D Linear Transform (3x3 Matrix)
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct LinearSpace3
|
||||||
|
{
|
||||||
|
typedef T Vector;
|
||||||
|
typedef typename T::Scalar Scalar;
|
||||||
|
|
||||||
|
/*! default matrix constructor */
|
||||||
|
__forceinline LinearSpace3 ( ) {}
|
||||||
|
|
||||||
|
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
|
||||||
|
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
|
||||||
|
|
||||||
|
template<typename L1> __forceinline LinearSpace3( const LinearSpace3<L1>& s ) : vx(s.vx), vy(s.vy), vz(s.vz) {}
|
||||||
|
|
||||||
|
/*! matrix construction from column vectors */
|
||||||
|
__forceinline LinearSpace3(const Vector& vx, const Vector& vy, const Vector& vz)
|
||||||
|
: vx(vx), vy(vy), vz(vz) {}
|
||||||
|
|
||||||
|
/*! construction from quaternion */
|
||||||
|
__forceinline LinearSpace3( const QuaternionT<Scalar>& q )
|
||||||
|
: vx((q.r*q.r + q.i*q.i - q.j*q.j - q.k*q.k), 2.0f*(q.i*q.j + q.r*q.k), 2.0f*(q.i*q.k - q.r*q.j))
|
||||||
|
, vy(2.0f*(q.i*q.j - q.r*q.k), (q.r*q.r - q.i*q.i + q.j*q.j - q.k*q.k), 2.0f*(q.j*q.k + q.r*q.i))
|
||||||
|
, vz(2.0f*(q.i*q.k + q.r*q.j), 2.0f*(q.j*q.k - q.r*q.i), (q.r*q.r - q.i*q.i - q.j*q.j + q.k*q.k)) {}
|
||||||
|
|
||||||
|
/*! matrix construction from row mayor data */
|
||||||
|
__forceinline LinearSpace3(const Scalar& m00, const Scalar& m01, const Scalar& m02,
|
||||||
|
const Scalar& m10, const Scalar& m11, const Scalar& m12,
|
||||||
|
const Scalar& m20, const Scalar& m21, const Scalar& m22)
|
||||||
|
: vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {}
|
||||||
|
|
||||||
|
/*! compute the determinant of the matrix */
|
||||||
|
__forceinline const Scalar det() const { return dot(vx,cross(vy,vz)); }
|
||||||
|
|
||||||
|
/*! compute adjoint matrix */
|
||||||
|
__forceinline const LinearSpace3 adjoint() const { return LinearSpace3(cross(vy,vz),cross(vz,vx),cross(vx,vy)).transposed(); }
|
||||||
|
|
||||||
|
/*! compute inverse matrix */
|
||||||
|
__forceinline const LinearSpace3 inverse() const { return adjoint()/det(); }
|
||||||
|
|
||||||
|
/*! compute transposed matrix */
|
||||||
|
__forceinline const LinearSpace3 transposed() const { return LinearSpace3(vx.x,vx.y,vx.z,vy.x,vy.y,vy.z,vz.x,vz.y,vz.z); }
|
||||||
|
|
||||||
|
/*! returns first row of matrix */
|
||||||
|
__forceinline Vector row0() const { return Vector(vx.x,vy.x,vz.x); }
|
||||||
|
|
||||||
|
/*! returns second row of matrix */
|
||||||
|
__forceinline Vector row1() const { return Vector(vx.y,vy.y,vz.y); }
|
||||||
|
|
||||||
|
/*! returns third row of matrix */
|
||||||
|
__forceinline Vector row2() const { return Vector(vx.z,vy.z,vz.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline LinearSpace3( ZeroTy ) : vx(zero), vy(zero), vz(zero) {}
|
||||||
|
__forceinline LinearSpace3( OneTy ) : vx(one, zero, zero), vy(zero, one, zero), vz(zero, zero, one) {}
|
||||||
|
|
||||||
|
/*! return matrix for scaling */
|
||||||
|
static __forceinline LinearSpace3 scale(const Vector& s) {
|
||||||
|
return LinearSpace3(s.x, 0, 0,
|
||||||
|
0 , s.y, 0,
|
||||||
|
0 , 0, s.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! return matrix for rotation around arbitrary axis */
|
||||||
|
static __forceinline LinearSpace3 rotate(const Vector& _u, const Scalar& r) {
|
||||||
|
Vector u = normalize(_u);
|
||||||
|
Scalar s = sin(r), c = cos(r);
|
||||||
|
return LinearSpace3(u.x*u.x+(1-u.x*u.x)*c, u.x*u.y*(1-c)-u.z*s, u.x*u.z*(1-c)+u.y*s,
|
||||||
|
u.x*u.y*(1-c)+u.z*s, u.y*u.y+(1-u.y*u.y)*c, u.y*u.z*(1-c)-u.x*s,
|
||||||
|
u.x*u.z*(1-c)-u.y*s, u.y*u.z*(1-c)+u.x*s, u.z*u.z+(1-u.z*u.z)*c);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*! the column vectors of the matrix */
|
||||||
|
Vector vx,vy,vz;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
|
||||||
|
/*! compute transposed matrix */
|
||||||
|
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
|
||||||
|
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
|
||||||
|
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
|
||||||
|
return xfm.transposed();
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a ) { return LinearSpace3<T>(-a.vx,-a.vy,-a.vz); }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a ) { return LinearSpace3<T>(+a.vx,+a.vy,+a.vz); }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> rcp ( const LinearSpace3<T>& a ) { return a.inverse(); }
|
||||||
|
|
||||||
|
/* constructs a coordinate frame form a normalized normal */
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> frame(const T& N)
|
||||||
|
{
|
||||||
|
const T dx0(0,N.z,-N.y);
|
||||||
|
const T dx1(-N.z,0,N.x);
|
||||||
|
const T dx = normalize(select(dot(dx0,dx0) > dot(dx1,dx1),dx0,dx1));
|
||||||
|
const T dy = normalize(cross(N,dx));
|
||||||
|
return LinearSpace3<T>(dx,dy,N);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* constructs a coordinate frame from a normal and approximate x-direction */
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> frame(const T& N, const T& dxi)
|
||||||
|
{
|
||||||
|
if (abs(dot(dxi,N)) > 0.99f) return frame(N); // fallback in case N and dxi are very parallel
|
||||||
|
const T dx = normalize(cross(dxi,N));
|
||||||
|
const T dy = normalize(cross(N,dx));
|
||||||
|
return LinearSpace3<T>(dx,dy,N);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* clamps linear space to range -1 to +1 */
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> clamp(const LinearSpace3<T>& space) {
|
||||||
|
return LinearSpace3<T>(clamp(space.vx,T(-1.0f),T(1.0f)),
|
||||||
|
clamp(space.vy,T(-1.0f),T(1.0f)),
|
||||||
|
clamp(space.vz,T(-1.0f),T(1.0f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx+b.vx,a.vy+b.vy,a.vz+b.vz); }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx-b.vx,a.vy-b.vy,a.vz-b.vz); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator*(const typename T::Scalar & a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
|
||||||
|
template<typename T> __forceinline T operator*(const LinearSpace3<T>& a, const T & b) { return madd(T(b.x),a.vx,madd(T(b.y),a.vy,T(b.z)*a.vz)); }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator*(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const typename T::Scalar & b) { return LinearSpace3<T>(a.vx/b, a.vy/b, a.vz/b); }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return a * rcp(b); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T>& operator *=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a * b; }
|
||||||
|
template<typename T> __forceinline LinearSpace3<T>& operator /=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a / b; }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T xfmPoint (const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
|
||||||
|
template<typename T> __forceinline T xfmVector(const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
|
||||||
|
template<typename T> __forceinline T xfmNormal(const LinearSpace3<T>& s, const T & a) { return xfmVector(s.inverse().transposed(),a); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline LinearSpace3<T> select ( const typename T::Scalar::Bool& s, const LinearSpace3<T>& t, const LinearSpace3<T>& f ) {
|
||||||
|
return LinearSpace3<T>(select(s,t.vx,f.vx),select(s,t.vy,f.vy),select(s,t.vz,f.vz));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! blending */
|
||||||
|
template<typename T>
|
||||||
|
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0, const LinearSpace3<T>& l1, const float t)
|
||||||
|
{
|
||||||
|
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
|
||||||
|
lerp(l0.vy,l1.vy,t),
|
||||||
|
lerp(l0.vz,l1.vz,t));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace3<T>& m) {
|
||||||
|
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << ", vz = " << m.vz << "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Shortcuts for common linear spaces. */
|
||||||
|
typedef LinearSpace3<Vec3f> LinearSpace3f;
|
||||||
|
typedef LinearSpace3<Vec3fa> LinearSpace3fa;
|
||||||
|
typedef LinearSpace3<Vec3fx> LinearSpace3fx;
|
||||||
|
typedef LinearSpace3<Vec3ff> LinearSpace3ff;
|
||||||
|
|
||||||
|
template<int N> using LinearSpace3vf = LinearSpace3<Vec3<vfloat<N>>>;
|
||||||
|
typedef LinearSpace3<Vec3<vfloat<4>>> LinearSpace3vf4;
|
||||||
|
typedef LinearSpace3<Vec3<vfloat<8>>> LinearSpace3vf8;
|
||||||
|
typedef LinearSpace3<Vec3<vfloat<16>>> LinearSpace3vf16;
|
||||||
|
|
||||||
|
/*! blending */
|
||||||
|
template<typename T, typename S>
|
||||||
|
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0,
|
||||||
|
const LinearSpace3<T>& l1,
|
||||||
|
const S& t)
|
||||||
|
{
|
||||||
|
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
|
||||||
|
lerp(l0.vy,l1.vy,t),
|
||||||
|
lerp(l0.vz,l1.vz,t));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
279
Framework/external/embree/common/math/math_sycl.h
vendored
Normal file
279
Framework/external/embree/common/math/math_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,279 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/platform.h"
|
||||||
|
#include "../sys/intrinsics.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
__forceinline bool isvalid ( const float& v ) {
|
||||||
|
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int cast_f2i(float f) {
|
||||||
|
return __builtin_bit_cast(int,f);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float cast_i2f(int i) {
|
||||||
|
return __builtin_bit_cast(float,i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int toInt (const float& a) { return int(a); }
|
||||||
|
__forceinline float toFloat(const int& a) { return float(a); }
|
||||||
|
|
||||||
|
__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
|
||||||
|
__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
|
||||||
|
|
||||||
|
//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
|
||||||
|
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
|
||||||
|
__forceinline float sqr ( const float x ) { return x*x; }
|
||||||
|
|
||||||
|
__forceinline float rcp ( const float x ) {
|
||||||
|
return sycl::native::recip(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
|
||||||
|
//__forceinline float signmsk ( const float x ) {
|
||||||
|
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
|
||||||
|
//}
|
||||||
|
//__forceinline float xorf( const float x, const float y ) {
|
||||||
|
// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
|
||||||
|
//}
|
||||||
|
//__forceinline float andf( const float x, const unsigned y ) {
|
||||||
|
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
|
||||||
|
//}
|
||||||
|
|
||||||
|
__forceinline float rsqrt( const float x ) {
|
||||||
|
return sycl::rsqrt(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
|
||||||
|
//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
|
||||||
|
//__forceinline int roundf(float f) { return (int)(f + 0.5f); }
|
||||||
|
|
||||||
|
__forceinline float abs ( const float x ) { return sycl::fabs(x); }
|
||||||
|
__forceinline float acos ( const float x ) { return sycl::acos(x); }
|
||||||
|
__forceinline float asin ( const float x ) { return sycl::asin(x); }
|
||||||
|
__forceinline float atan ( const float x ) { return sycl::atan(x); }
|
||||||
|
__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
|
||||||
|
__forceinline float cos ( const float x ) { return sycl::cos(x); }
|
||||||
|
__forceinline float cosh ( const float x ) { return sycl::cosh(x); }
|
||||||
|
__forceinline float exp ( const float x ) { return sycl::exp(x); }
|
||||||
|
__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
|
||||||
|
__forceinline float log ( const float x ) { return sycl::log(x); }
|
||||||
|
__forceinline float log10( const float x ) { return sycl::log10(x); }
|
||||||
|
__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
|
||||||
|
__forceinline float sin ( const float x ) { return sycl::sin(x); }
|
||||||
|
__forceinline float sinh ( const float x ) { return sycl::sinh(x); }
|
||||||
|
__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
|
||||||
|
__forceinline float tan ( const float x ) { return sycl::tan(x); }
|
||||||
|
__forceinline float tanh ( const float x ) { return sycl::tanh(x); }
|
||||||
|
__forceinline float floor( const float x ) { return sycl::floor(x); }
|
||||||
|
__forceinline float ceil ( const float x ) { return sycl::ceil(x); }
|
||||||
|
__forceinline float frac ( const float x ) { return x-floor(x); }
|
||||||
|
|
||||||
|
//__forceinline double abs ( const double x ) { return ::fabs(x); }
|
||||||
|
//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
|
||||||
|
//__forceinline double acos ( const double x ) { return ::acos (x); }
|
||||||
|
//__forceinline double asin ( const double x ) { return ::asin (x); }
|
||||||
|
//__forceinline double atan ( const double x ) { return ::atan (x); }
|
||||||
|
//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
|
||||||
|
//__forceinline double cos ( const double x ) { return ::cos (x); }
|
||||||
|
//__forceinline double cosh ( const double x ) { return ::cosh (x); }
|
||||||
|
//__forceinline double exp ( const double x ) { return ::exp (x); }
|
||||||
|
//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
|
||||||
|
//__forceinline double log ( const double x ) { return ::log (x); }
|
||||||
|
//__forceinline double log10( const double x ) { return ::log10(x); }
|
||||||
|
//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
|
||||||
|
//__forceinline double rcp ( const double x ) { return 1.0/x; }
|
||||||
|
//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
|
||||||
|
//__forceinline double sin ( const double x ) { return ::sin (x); }
|
||||||
|
//__forceinline double sinh ( const double x ) { return ::sinh (x); }
|
||||||
|
//__forceinline double sqr ( const double x ) { return x*x; }
|
||||||
|
//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
|
||||||
|
//__forceinline double tan ( const double x ) { return ::tan (x); }
|
||||||
|
//__forceinline double tanh ( const double x ) { return ::tanh (x); }
|
||||||
|
//__forceinline double floor( const double x ) { return ::floor (x); }
|
||||||
|
//__forceinline double ceil ( const double x ) { return ::ceil (x); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline float mini(float a, float b) {
|
||||||
|
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||||
|
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||||
|
const __m128i ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline float maxi(float a, float b) {
|
||||||
|
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||||
|
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||||
|
const __m128i ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline T twice(const T& a) { return a+a; }
|
||||||
|
|
||||||
|
__forceinline int min(int a, int b) { return sycl::min(a,b); }
|
||||||
|
__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
|
||||||
|
__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
|
||||||
|
__forceinline float min(float a, float b) { return sycl::fmin(a,b); }
|
||||||
|
__forceinline double min(double a, double b) { return sycl::fmin(a,b); }
|
||||||
|
#if defined(__X86_64__)
|
||||||
|
__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
|
||||||
|
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
|
||||||
|
|
||||||
|
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
|
||||||
|
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
|
||||||
|
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
|
||||||
|
|
||||||
|
__forceinline int max(int a, int b) { return sycl::max(a,b); }
|
||||||
|
__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
|
||||||
|
__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
|
||||||
|
__forceinline float max(float a, float b) { return sycl::fmax(a,b); }
|
||||||
|
__forceinline double max(double a, double b) { return sycl::fmax(a,b); }
|
||||||
|
#if defined(__X86_64__)
|
||||||
|
__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
|
||||||
|
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
|
||||||
|
|
||||||
|
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
|
||||||
|
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
|
||||||
|
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
|
||||||
|
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
|
||||||
|
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
|
||||||
|
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
|
||||||
|
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
|
||||||
|
|
||||||
|
__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
|
||||||
|
__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
|
||||||
|
__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
|
||||||
|
__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
|
||||||
|
|
||||||
|
/*! random functions */
|
||||||
|
/*
|
||||||
|
template<typename T> T random() { return T(0); }
|
||||||
|
template<> __forceinline int random() { return int(rand()); }
|
||||||
|
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
|
||||||
|
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
|
||||||
|
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! selects */
|
||||||
|
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
|
||||||
|
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
|
||||||
|
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
|
||||||
|
|
||||||
|
__forceinline bool none(bool s) { return !s; }
|
||||||
|
__forceinline bool all (bool s) { return s; }
|
||||||
|
__forceinline bool any (bool s) { return s; }
|
||||||
|
|
||||||
|
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
|
||||||
|
|
||||||
|
__forceinline float lerp(const float v0, const float v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
|
||||||
|
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! exchange */
|
||||||
|
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
|
||||||
|
|
||||||
|
/* load/store */
|
||||||
|
template<typename Ty> struct mem;
|
||||||
|
|
||||||
|
template<> struct mem<float> {
|
||||||
|
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||||
|
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||||
|
|
||||||
|
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||||
|
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! bit reverse operation */
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitReverse(const T& vin)
|
||||||
|
{
|
||||||
|
T v = vin;
|
||||||
|
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
|
||||||
|
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
|
||||||
|
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
|
||||||
|
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
|
||||||
|
v = ( v >> 16 ) | ( v << 16);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! bit interleave operation */
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
|
||||||
|
{
|
||||||
|
T x = xin, y = yin, z = zin;
|
||||||
|
x = (x | (x << 16)) & 0x030000FF;
|
||||||
|
x = (x | (x << 8)) & 0x0300F00F;
|
||||||
|
x = (x | (x << 4)) & 0x030C30C3;
|
||||||
|
x = (x | (x << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
y = (y | (y << 16)) & 0x030000FF;
|
||||||
|
y = (y | (y << 8)) & 0x0300F00F;
|
||||||
|
y = (y | (y << 4)) & 0x030C30C3;
|
||||||
|
y = (y | (y << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
z = (z | (z << 16)) & 0x030000FF;
|
||||||
|
z = (z | (z << 8)) & 0x0300F00F;
|
||||||
|
z = (z | (z << 4)) & 0x030C30C3;
|
||||||
|
z = (z | (z << 2)) & 0x09249249;
|
||||||
|
|
||||||
|
return x | (y << 1) | (z << 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! bit interleave operation for 64bit data types*/
|
||||||
|
template<class T>
|
||||||
|
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
|
||||||
|
T x = xin & 0x1fffff;
|
||||||
|
T y = yin & 0x1fffff;
|
||||||
|
T z = zin & 0x1fffff;
|
||||||
|
|
||||||
|
x = (x | x << 32) & 0x1f00000000ffff;
|
||||||
|
x = (x | x << 16) & 0x1f0000ff0000ff;
|
||||||
|
x = (x | x << 8) & 0x100f00f00f00f00f;
|
||||||
|
x = (x | x << 4) & 0x10c30c30c30c30c3;
|
||||||
|
x = (x | x << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
y = (y | y << 32) & 0x1f00000000ffff;
|
||||||
|
y = (y | y << 16) & 0x1f0000ff0000ff;
|
||||||
|
y = (y | y << 8) & 0x100f00f00f00f00f;
|
||||||
|
y = (y | y << 4) & 0x10c30c30c30c30c3;
|
||||||
|
y = (y | y << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
z = (z | z << 32) & 0x1f00000000ffff;
|
||||||
|
z = (z | z << 16) & 0x1f0000ff0000ff;
|
||||||
|
z = (z | z << 8) & 0x100f00f00f00f00f;
|
||||||
|
z = (z | z << 4) & 0x10c30c30c30c30c3;
|
||||||
|
z = (z | z << 2) & 0x1249249249249249;
|
||||||
|
|
||||||
|
return x | (y << 1) | (z << 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
39
Framework/external/embree/common/math/obbox.h
vendored
Normal file
39
Framework/external/embree/common/math/obbox.h
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "bbox.h"
|
||||||
|
#include "linearspace3.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
/*! Oriented bounding box */
|
||||||
|
template<typename T>
|
||||||
|
struct OBBox
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
__forceinline OBBox () {}
|
||||||
|
|
||||||
|
__forceinline OBBox (EmptyTy)
|
||||||
|
: space(one), bounds(empty) {}
|
||||||
|
|
||||||
|
__forceinline OBBox (const BBox<T>& bounds)
|
||||||
|
: space(one), bounds(bounds) {}
|
||||||
|
|
||||||
|
__forceinline OBBox (const LinearSpace3<T>& space, const BBox<T>& bounds)
|
||||||
|
: space(space), bounds(bounds) {}
|
||||||
|
|
||||||
|
friend embree_ostream operator<<(embree_ostream cout, const OBBox& p) {
|
||||||
|
return cout << "{ space = " << p.space << ", bounds = " << p.bounds << "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
LinearSpace3<T> space; //!< orthonormal transformation
|
||||||
|
BBox<T> bounds; //!< bounds in transformed space
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef OBBox<Vec3f> OBBox3f;
|
||||||
|
typedef OBBox<Vec3fa> OBBox3fa;
|
||||||
|
}
|
||||||
258
Framework/external/embree/common/math/quaternion.h
vendored
Normal file
258
Framework/external/embree/common/math/quaternion.h
vendored
Normal file
|
|
@ -0,0 +1,258 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec3.h"
|
||||||
|
#include "vec4.h"
|
||||||
|
|
||||||
|
#include "transcendental.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Quaternion Struct
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct QuaternionT
|
||||||
|
{
|
||||||
|
typedef Vec3<T> Vector;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline QuaternionT () { }
|
||||||
|
__forceinline QuaternionT ( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; }
|
||||||
|
__forceinline QuaternionT& operator=( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; return *this; }
|
||||||
|
|
||||||
|
__forceinline QuaternionT( const T& r ) : r(r), i(zero), j(zero), k(zero) {}
|
||||||
|
__forceinline explicit QuaternionT( const Vec3<T>& v ) : r(zero), i(v.x), j(v.y), k(v.z) {}
|
||||||
|
__forceinline explicit QuaternionT( const Vec4<T>& v ) : r(v.x), i(v.y), j(v.z), k(v.w) {}
|
||||||
|
__forceinline QuaternionT( const T& r, const T& i, const T& j, const T& k ) : r(r), i(i), j(j), k(k) {}
|
||||||
|
__forceinline QuaternionT( const T& r, const Vec3<T>& v ) : r(r), i(v.x), j(v.y), k(v.z) {}
|
||||||
|
|
||||||
|
__inline QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz );
|
||||||
|
__inline QuaternionT( const T& yaw, const T& pitch, const T& roll );
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline QuaternionT( ZeroTy ) : r(zero), i(zero), j(zero), k(zero) {}
|
||||||
|
__forceinline QuaternionT( OneTy ) : r( one), i(zero), j(zero), k(zero) {}
|
||||||
|
|
||||||
|
/*! return quaternion for rotation around arbitrary axis */
|
||||||
|
static __forceinline QuaternionT rotate(const Vec3<T>& u, const T& r) {
|
||||||
|
return QuaternionT<T>(cos(T(0.5)*r),sin(T(0.5)*r)*normalize(u));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! returns the rotation axis of the quaternion as a vector */
|
||||||
|
__forceinline Vec3<T> v( ) const { return Vec3<T>(i, j, k); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
T r, i, j, k;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator *( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a * b.r, a * b.i, a * b.j, a * b.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r * b, a.i * b, a.j * b, a.k * b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a ) { return QuaternionT<T>(+a.r, +a.i, +a.j, +a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a ) { return QuaternionT<T>(-a.r, -a.i, -a.j, -a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> conj ( const QuaternionT<T>& a ) { return QuaternionT<T>(a.r, -a.i, -a.j, -a.k); }
|
||||||
|
template<typename T> __forceinline T abs ( const QuaternionT<T>& a ) { return sqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> rcp ( const QuaternionT<T>& a ) { return conj(a)*rcp(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> normalize ( const QuaternionT<T>& a ) { return a*rsqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
|
||||||
|
|
||||||
|
// evaluates a*q-r
|
||||||
|
template<typename T> __forceinline QuaternionT<T>
|
||||||
|
msub(const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
|
||||||
|
{
|
||||||
|
return QuaternionT<T>(msub(a, q.r, p.r),
|
||||||
|
msub(a, q.i, p.i),
|
||||||
|
msub(a, q.j, p.j),
|
||||||
|
msub(a, q.k, p.k));
|
||||||
|
}
|
||||||
|
// evaluates a*q-r
|
||||||
|
template<typename T> __forceinline QuaternionT<T>
|
||||||
|
madd (const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
|
||||||
|
{
|
||||||
|
return QuaternionT<T>(madd(a, q.r, p.r),
|
||||||
|
madd(a, q.i, p.i),
|
||||||
|
madd(a, q.j, p.j),
|
||||||
|
madd(a, q.k, p.k));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator +( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a + b.r, b.i, b.j, b.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r + b, a.i, a.j, a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r + b.r, a.i + b.i, a.j + b.j, a.k + b.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator -( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a - b.r, -b.i, -b.j, -b.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r - b, a.i, a.j, a.k); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r - b.r, a.i - b.i, a.j - b.j, a.k - b.k); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> operator *( const QuaternionT<T>& a, const Vec3<T> & b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const QuaternionT<T>& b ) {
|
||||||
|
return QuaternionT<T>(a.r*b.r - a.i*b.i - a.j*b.j - a.k*b.k,
|
||||||
|
a.r*b.i + a.i*b.r + a.j*b.k - a.k*b.j,
|
||||||
|
a.r*b.j - a.i*b.k + a.j*b.r + a.k*b.i,
|
||||||
|
a.r*b.k + a.i*b.j - a.j*b.i + a.k*b.r);
|
||||||
|
}
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator /( const T & a, const QuaternionT<T>& b ) { return a*rcp(b); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const T & b ) { return a*rcp(b); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a*rcp(b); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const T & b ) { return a = a+b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a+b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const T & b ) { return a = a-b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a-b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const T & b ) { return a = a*b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*b; }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const T & b ) { return a = a*rcp(b); }
|
||||||
|
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*rcp(b); }
|
||||||
|
|
||||||
|
template<typename T, typename M> __forceinline QuaternionT<T>
|
||||||
|
select(const M& m, const QuaternionT<T>& q, const QuaternionT<T>& p)
|
||||||
|
{
|
||||||
|
return QuaternionT<T>(select(m, q.r, p.r),
|
||||||
|
select(m, q.i, p.i),
|
||||||
|
select(m, q.j, p.j),
|
||||||
|
select(m, q.k, p.k));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> xfmPoint ( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
|
||||||
|
template<typename T> __forceinline Vec3<T> xfmVector( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
|
||||||
|
template<typename T> __forceinline Vec3<T> xfmNormal( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T dot(const QuaternionT<T>& a, const QuaternionT<T>& b) { return a.r*b.r + a.i*b.i + a.j*b.j + a.k*b.k; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r == b.r && a.i == b.i && a.j == b.j && a.k == b.k; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r != b.r || a.i != b.i || a.j != b.j || a.k != b.k; }
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Orientation Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> QuaternionT<T>::QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz )
|
||||||
|
{
|
||||||
|
if ( vx.x + vy.y + vz.z >= T(zero) )
|
||||||
|
{
|
||||||
|
const T t = T(one) + (vx.x + vy.y + vz.z);
|
||||||
|
const T s = rsqrt(t)*T(0.5f);
|
||||||
|
r = t*s;
|
||||||
|
i = (vy.z - vz.y)*s;
|
||||||
|
j = (vz.x - vx.z)*s;
|
||||||
|
k = (vx.y - vy.x)*s;
|
||||||
|
}
|
||||||
|
else if ( vx.x >= max(vy.y, vz.z) )
|
||||||
|
{
|
||||||
|
const T t = (T(one) + vx.x) - (vy.y + vz.z);
|
||||||
|
const T s = rsqrt(t)*T(0.5f);
|
||||||
|
r = (vy.z - vz.y)*s;
|
||||||
|
i = t*s;
|
||||||
|
j = (vx.y + vy.x)*s;
|
||||||
|
k = (vz.x + vx.z)*s;
|
||||||
|
}
|
||||||
|
else if ( vy.y >= vz.z ) // if ( vy.y >= max(vz.z, vx.x) )
|
||||||
|
{
|
||||||
|
const T t = (T(one) + vy.y) - (vz.z + vx.x);
|
||||||
|
const T s = rsqrt(t)*T(0.5f);
|
||||||
|
r = (vz.x - vx.z)*s;
|
||||||
|
i = (vx.y + vy.x)*s;
|
||||||
|
j = t*s;
|
||||||
|
k = (vy.z + vz.y)*s;
|
||||||
|
}
|
||||||
|
else //if ( vz.z >= max(vy.y, vx.x) )
|
||||||
|
{
|
||||||
|
const T t = (T(one) + vz.z) - (vx.x + vy.y);
|
||||||
|
const T s = rsqrt(t)*T(0.5f);
|
||||||
|
r = (vx.y - vy.x)*s;
|
||||||
|
i = (vz.x + vx.z)*s;
|
||||||
|
j = (vy.z + vz.y)*s;
|
||||||
|
k = t*s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> QuaternionT<T>::QuaternionT( const T& yaw, const T& pitch, const T& roll )
|
||||||
|
{
|
||||||
|
const T cya = cos(yaw *T(0.5f));
|
||||||
|
const T cpi = cos(pitch*T(0.5f));
|
||||||
|
const T cro = cos(roll *T(0.5f));
|
||||||
|
const T sya = sin(yaw *T(0.5f));
|
||||||
|
const T spi = sin(pitch*T(0.5f));
|
||||||
|
const T sro = sin(roll *T(0.5f));
|
||||||
|
r = cro*cya*cpi + sro*sya*spi;
|
||||||
|
i = cro*cya*spi + sro*sya*cpi;
|
||||||
|
j = cro*sya*cpi - sro*cya*spi;
|
||||||
|
k = sro*cya*cpi - cro*sya*spi;
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> static embree_ostream operator<<(embree_ostream cout, const QuaternionT<T>& q) {
|
||||||
|
return cout << "{ r = " << q.r << ", i = " << q.i << ", j = " << q.j << ", k = " << q.k << " }";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! default template instantiations */
|
||||||
|
typedef QuaternionT<float> Quaternion3f;
|
||||||
|
typedef QuaternionT<double> Quaternion3d;
|
||||||
|
|
||||||
|
template<int N> using Quaternion3vf = QuaternionT<vfloat<N>>;
|
||||||
|
typedef QuaternionT<vfloat<4>> Quaternion3vf4;
|
||||||
|
typedef QuaternionT<vfloat<8>> Quaternion3vf8;
|
||||||
|
typedef QuaternionT<vfloat<16>> Quaternion3vf16;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Interpolation
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<typename T>
|
||||||
|
__forceinline QuaternionT<T>lerp(const QuaternionT<T>& q0,
|
||||||
|
const QuaternionT<T>& q1,
|
||||||
|
const T& factor)
|
||||||
|
{
|
||||||
|
QuaternionT<T> q;
|
||||||
|
q.r = lerp(q0.r, q1.r, factor);
|
||||||
|
q.i = lerp(q0.i, q1.i, factor);
|
||||||
|
q.j = lerp(q0.j, q1.j, factor);
|
||||||
|
q.k = lerp(q0.k, q1.k, factor);
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline QuaternionT<T> slerp(const QuaternionT<T>& q0,
|
||||||
|
const QuaternionT<T>& q1_,
|
||||||
|
const T& t)
|
||||||
|
{
|
||||||
|
T cosTheta = dot(q0, q1_);
|
||||||
|
QuaternionT<T> q1 = select(cosTheta < 0.f, -q1_, q1_);
|
||||||
|
cosTheta = select(cosTheta < 0.f, -cosTheta, cosTheta);
|
||||||
|
|
||||||
|
// spherical linear interpolation
|
||||||
|
const T phi = t * fastapprox::acos(cosTheta);
|
||||||
|
T sinPhi, cosPhi;
|
||||||
|
fastapprox::sincos(phi, sinPhi, cosPhi);
|
||||||
|
QuaternionT<T> qperp = sinPhi * normalize(msub(cosTheta, q0, q1));
|
||||||
|
QuaternionT<T> qslerp = msub(cosPhi, q0, qperp);
|
||||||
|
|
||||||
|
// regular linear interpolation as fallback
|
||||||
|
QuaternionT<T> qlerp = normalize(lerp(q0, q1, t));
|
||||||
|
|
||||||
|
return select(cosTheta > 0.9995f, qlerp, qslerp);
|
||||||
|
}
|
||||||
|
}
|
||||||
114
Framework/external/embree/common/math/random_sampler.h
vendored
Normal file
114
Framework/external/embree/common/math/random_sampler.h
vendored
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec.h"
|
||||||
|
|
||||||
|
namespace embree {
|
||||||
|
|
||||||
|
struct RandomSampler
|
||||||
|
{
|
||||||
|
unsigned int s;
|
||||||
|
};
|
||||||
|
|
||||||
|
__forceinline unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k)
|
||||||
|
{
|
||||||
|
const unsigned int c1 = 0xcc9e2d51;
|
||||||
|
const unsigned int c2 = 0x1b873593;
|
||||||
|
const unsigned int r1 = 15;
|
||||||
|
const unsigned int r2 = 13;
|
||||||
|
const unsigned int m = 5;
|
||||||
|
const unsigned int n = 0xe6546b64;
|
||||||
|
|
||||||
|
k *= c1;
|
||||||
|
k = (k << r1) | (k >> (32 - r1));
|
||||||
|
k *= c2;
|
||||||
|
|
||||||
|
hash ^= k;
|
||||||
|
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline unsigned int MurmurHash3_finalize(unsigned int hash)
|
||||||
|
{
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
hash *= 0x85ebca6b;
|
||||||
|
hash ^= hash >> 13;
|
||||||
|
hash *= 0xc2b2ae35;
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline unsigned int LCG_next(unsigned int value)
|
||||||
|
{
|
||||||
|
const unsigned int m = 1664525;
|
||||||
|
const unsigned int n = 1013904223;
|
||||||
|
|
||||||
|
return value * m + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void RandomSampler_init(RandomSampler& self, int id)
|
||||||
|
{
|
||||||
|
unsigned int hash = 0;
|
||||||
|
hash = MurmurHash3_mix(hash, id);
|
||||||
|
hash = MurmurHash3_finalize(hash);
|
||||||
|
|
||||||
|
self.s = hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void RandomSampler_init(RandomSampler& self, int pixelId, int sampleId)
|
||||||
|
{
|
||||||
|
unsigned int hash = 0;
|
||||||
|
hash = MurmurHash3_mix(hash, pixelId);
|
||||||
|
hash = MurmurHash3_mix(hash, sampleId);
|
||||||
|
hash = MurmurHash3_finalize(hash);
|
||||||
|
|
||||||
|
self.s = hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void RandomSampler_init(RandomSampler& self, int x, int y, int sampleId)
|
||||||
|
{
|
||||||
|
RandomSampler_init(self, x | (y << 16), sampleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int RandomSampler_getInt(RandomSampler& self) {
|
||||||
|
self.s = LCG_next(self.s); return self.s >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline unsigned int RandomSampler_getUInt(RandomSampler& self) {
|
||||||
|
self.s = LCG_next(self.s); return self.s;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float RandomSampler_getFloat(RandomSampler& self) {
|
||||||
|
return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float RandomSampler_get1D(RandomSampler& self) {
|
||||||
|
return RandomSampler_getFloat(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2f RandomSampler_get2D(RandomSampler& self)
|
||||||
|
{
|
||||||
|
const float u = RandomSampler_get1D(self);
|
||||||
|
const float v = RandomSampler_get1D(self);
|
||||||
|
return Vec2f(u,v);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa RandomSampler_get3D(RandomSampler& self)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
const float u = RandomSampler_get1D(self);
|
||||||
|
const float v = RandomSampler_get1D(self);
|
||||||
|
const float w = RandomSampler_get1D(self);
|
||||||
|
return Vec3fa(u,v,w);
|
||||||
|
*/
|
||||||
|
const int u = RandomSampler_getUInt(self);
|
||||||
|
const int v = RandomSampler_getUInt(self);
|
||||||
|
const int w = RandomSampler_getUInt(self);
|
||||||
|
return Vec3fa(srl(Vec3ia(u,v,w), 1)) * 4.656612873077392578125e-10f;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace embree
|
||||||
104
Framework/external/embree/common/math/random_sampler.isph
vendored
Normal file
104
Framework/external/embree/common/math/random_sampler.isph
vendored
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../math/vec.isph"
|
||||||
|
|
||||||
|
struct RandomSampler
|
||||||
|
{
|
||||||
|
unsigned int s;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k)
|
||||||
|
{
|
||||||
|
const unsigned int c1 = 0xcc9e2d51;
|
||||||
|
const unsigned int c2 = 0x1b873593;
|
||||||
|
const unsigned int r1 = 15;
|
||||||
|
const unsigned int r2 = 13;
|
||||||
|
const unsigned int m = 5;
|
||||||
|
const unsigned int n = 0xe6546b64;
|
||||||
|
|
||||||
|
k *= c1;
|
||||||
|
k = (k << r1) | (k >> (32 - r1));
|
||||||
|
k *= c2;
|
||||||
|
|
||||||
|
hash ^= k;
|
||||||
|
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline unsigned int MurmurHash3_finalize(unsigned int hash)
|
||||||
|
{
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
hash *= 0x85ebca6b;
|
||||||
|
hash ^= hash >> 13;
|
||||||
|
hash *= 0xc2b2ae35;
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline unsigned int LCG_next(unsigned int value)
|
||||||
|
{
|
||||||
|
const unsigned int m = 1664525;
|
||||||
|
const unsigned int n = 1013904223;
|
||||||
|
|
||||||
|
return value * m + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void RandomSampler_init(RandomSampler& self, int id)
|
||||||
|
{
|
||||||
|
unsigned int hash = 0;
|
||||||
|
hash = MurmurHash3_mix(hash, id);
|
||||||
|
hash = MurmurHash3_finalize(hash);
|
||||||
|
|
||||||
|
self.s = hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void RandomSampler_init(RandomSampler& self, int pixelId, int sampleId)
|
||||||
|
{
|
||||||
|
unsigned int hash = 0;
|
||||||
|
hash = MurmurHash3_mix(hash, pixelId);
|
||||||
|
hash = MurmurHash3_mix(hash, sampleId);
|
||||||
|
hash = MurmurHash3_finalize(hash);
|
||||||
|
|
||||||
|
self.s = hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void RandomSampler_init(RandomSampler& self, int x, int y, int sampleId)
|
||||||
|
{
|
||||||
|
RandomSampler_init(self, x | (y << 16), sampleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int RandomSampler_getInt(RandomSampler& self) {
|
||||||
|
self.s = LCG_next(self.s); return self.s >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline unsigned int RandomSampler_getUInt(RandomSampler& self) {
|
||||||
|
self.s = LCG_next(self.s); return self.s;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float RandomSampler_getFloat(RandomSampler& self) {
|
||||||
|
return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float RandomSampler_get1D(RandomSampler& self) {
|
||||||
|
return RandomSampler_getFloat(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vec2f RandomSampler_get2D(RandomSampler& self)
|
||||||
|
{
|
||||||
|
const float u = RandomSampler_get1D(self);
|
||||||
|
const float v = RandomSampler_get1D(self);
|
||||||
|
return make_Vec2f(u,v);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vec3fa RandomSampler_get3D(RandomSampler& self)
|
||||||
|
{
|
||||||
|
const float u = RandomSampler_get1D(self);
|
||||||
|
const float v = RandomSampler_get1D(self);
|
||||||
|
const float w = RandomSampler_get1D(self);
|
||||||
|
return make_Vec3fa(u,v,w);
|
||||||
|
}
|
||||||
137
Framework/external/embree/common/math/range.h
vendored
Normal file
137
Framework/external/embree/common/math/range.h
vendored
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/platform.h"
|
||||||
|
#include "../math/emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Ty>
|
||||||
|
struct range
|
||||||
|
{
|
||||||
|
__forceinline range() {}
|
||||||
|
|
||||||
|
__forceinline range(const Ty& begin)
|
||||||
|
: _begin(begin), _end(begin+1) {}
|
||||||
|
|
||||||
|
__forceinline range(const Ty& begin, const Ty& end)
|
||||||
|
: _begin(begin), _end(end) {}
|
||||||
|
|
||||||
|
__forceinline range(const range& other)
|
||||||
|
: _begin(other._begin), _end(other._end) {}
|
||||||
|
|
||||||
|
template<typename T1>
|
||||||
|
__forceinline range(const range<T1>& other)
|
||||||
|
: _begin(Ty(other._begin)), _end(Ty(other._end)) {}
|
||||||
|
|
||||||
|
template<typename T1>
|
||||||
|
__forceinline range& operator =(const range<T1>& other) {
|
||||||
|
_begin = other._begin;
|
||||||
|
_end = other._end;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty begin() const {
|
||||||
|
return _begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty end() const {
|
||||||
|
return _end;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline range intersect(const range& r) const {
|
||||||
|
return range (max(_begin,r._begin),min(_end,r._end));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty size() const {
|
||||||
|
return _end - _begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool empty() const {
|
||||||
|
return _end <= _begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty center() const {
|
||||||
|
return (_begin + _end)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline std::pair<range,range> split() const
|
||||||
|
{
|
||||||
|
const Ty _center = center();
|
||||||
|
return std::make_pair(range(_begin,_center),range(_center,_end));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void split(range& left_o, range& right_o) const
|
||||||
|
{
|
||||||
|
const Ty _center = center();
|
||||||
|
left_o = range(_begin,_center);
|
||||||
|
right_o = range(_center,_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline friend bool operator< (const range& r0, const range& r1) {
|
||||||
|
return r0.size() < r1.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
friend embree_ostream operator<<(embree_ostream cout, const range& r) {
|
||||||
|
return cout << "range [" << r.begin() << ", " << r.end() << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty _begin, _end;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Ty>
|
||||||
|
range<Ty> make_range(const Ty& begin, const Ty& end) {
|
||||||
|
return range<Ty>(begin,end);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Ty>
|
||||||
|
struct extended_range : public range<Ty>
|
||||||
|
{
|
||||||
|
__forceinline extended_range () {}
|
||||||
|
|
||||||
|
__forceinline extended_range (const Ty& begin)
|
||||||
|
: range<Ty>(begin), _ext_end(begin+1) {}
|
||||||
|
|
||||||
|
__forceinline extended_range (const Ty& begin, const Ty& end)
|
||||||
|
: range<Ty>(begin,end), _ext_end(end) {}
|
||||||
|
|
||||||
|
__forceinline extended_range (const Ty& begin, const Ty& end, const Ty& ext_end)
|
||||||
|
: range<Ty>(begin,end), _ext_end(ext_end) {}
|
||||||
|
|
||||||
|
__forceinline Ty ext_end() const {
|
||||||
|
return _ext_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty ext_size() const {
|
||||||
|
return _ext_end - range<Ty>::_begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Ty ext_range_size() const {
|
||||||
|
return _ext_end - range<Ty>::_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool has_ext_range() const {
|
||||||
|
assert(_ext_end >= range<Ty>::_end);
|
||||||
|
return (_ext_end - range<Ty>::_end) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void set_ext_range(const size_t ext_end){
|
||||||
|
assert(ext_end >= range<Ty>::_end);
|
||||||
|
_ext_end = ext_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void move_right(const size_t plus){
|
||||||
|
range<Ty>::_begin += plus;
|
||||||
|
range<Ty>::_end += plus;
|
||||||
|
_ext_end += plus;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend embree_ostream operator<<(embree_ostream cout, const extended_range& r) {
|
||||||
|
return cout << "extended_range [" << r.begin() << ", " << r.end() << " (" << r.ext_end() << ")]";
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty _ext_end;
|
||||||
|
};
|
||||||
|
}
|
||||||
525
Framework/external/embree/common/math/transcendental.h
vendored
Normal file
525
Framework/external/embree/common/math/transcendental.h
vendored
Normal file
|
|
@ -0,0 +1,525 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Transcendental functions from "ispc": https://github.com/ispc/ispc/
|
||||||
|
// Most of the transcendental implementations in ispc code come from
|
||||||
|
// Solomon Boulos's "syrah": https://github.com/boulos/syrah/
|
||||||
|
|
||||||
|
#include "../simd/simd.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace fastapprox
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T sin(const T &v)
|
||||||
|
{
|
||||||
|
static const float piOverTwoVec = 1.57079637050628662109375;
|
||||||
|
static const float twoOverPiVec = 0.636619746685028076171875;
|
||||||
|
auto scaled = v * twoOverPiVec;
|
||||||
|
auto kReal = floor(scaled);
|
||||||
|
auto k = toInt(kReal);
|
||||||
|
|
||||||
|
// Reduced range version of x
|
||||||
|
auto x = v - kReal * piOverTwoVec;
|
||||||
|
auto kMod4 = k & 3;
|
||||||
|
auto sinUseCos = (kMod4 == 1) | (kMod4 == 3);
|
||||||
|
auto flipSign = (kMod4 > 1);
|
||||||
|
|
||||||
|
// These coefficients are from sollya with fpminimax(sin(x)/x, [|0, 2,
|
||||||
|
// 4, 6, 8, 10|], [|single...|], [0;Pi/2]);
|
||||||
|
static const float sinC2 = -0.16666667163372039794921875;
|
||||||
|
static const float sinC4 = +8.333347737789154052734375e-3;
|
||||||
|
static const float sinC6 = -1.9842604524455964565277099609375e-4;
|
||||||
|
static const float sinC8 = +2.760012648650445044040679931640625e-6;
|
||||||
|
static const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
|
||||||
|
|
||||||
|
static const float cosC2 = -0.5;
|
||||||
|
static const float cosC4 = +4.166664183139801025390625e-2;
|
||||||
|
static const float cosC6 = -1.388833043165504932403564453125e-3;
|
||||||
|
static const float cosC8 = +2.47562347794882953166961669921875e-5;
|
||||||
|
static const float cosC10 = -2.59630184018533327616751194000244140625e-7;
|
||||||
|
|
||||||
|
auto outside = select(sinUseCos, 1., x);
|
||||||
|
auto c2 = select(sinUseCos, T(cosC2), T(sinC2));
|
||||||
|
auto c4 = select(sinUseCos, T(cosC4), T(sinC4));
|
||||||
|
auto c6 = select(sinUseCos, T(cosC6), T(sinC6));
|
||||||
|
auto c8 = select(sinUseCos, T(cosC8), T(sinC8));
|
||||||
|
auto c10 = select(sinUseCos, T(cosC10), T(sinC10));
|
||||||
|
|
||||||
|
auto x2 = x * x;
|
||||||
|
auto formula = x2 * c10 + c8;
|
||||||
|
formula = x2 * formula + c6;
|
||||||
|
formula = x2 * formula + c4;
|
||||||
|
formula = x2 * formula + c2;
|
||||||
|
formula = x2 * formula + 1.;
|
||||||
|
formula *= outside;
|
||||||
|
|
||||||
|
formula = select(flipSign, -formula, formula);
|
||||||
|
return formula;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T cos(const T &v)
|
||||||
|
{
|
||||||
|
static const float piOverTwoVec = 1.57079637050628662109375;
|
||||||
|
static const float twoOverPiVec = 0.636619746685028076171875;
|
||||||
|
auto scaled = v * twoOverPiVec;
|
||||||
|
auto kReal = floor(scaled);
|
||||||
|
auto k = toInt(kReal);
|
||||||
|
|
||||||
|
// Reduced range version of x
|
||||||
|
auto x = v - kReal * piOverTwoVec;
|
||||||
|
|
||||||
|
auto kMod4 = k & 3;
|
||||||
|
auto cosUseCos = (kMod4 == 0) | (kMod4 == 2);
|
||||||
|
auto flipSign = (kMod4 == 1) | (kMod4 == 2);
|
||||||
|
|
||||||
|
const float sinC2 = -0.16666667163372039794921875;
|
||||||
|
const float sinC4 = +8.333347737789154052734375e-3;
|
||||||
|
const float sinC6 = -1.9842604524455964565277099609375e-4;
|
||||||
|
const float sinC8 = +2.760012648650445044040679931640625e-6;
|
||||||
|
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
|
||||||
|
|
||||||
|
const float cosC2 = -0.5;
|
||||||
|
const float cosC4 = +4.166664183139801025390625e-2;
|
||||||
|
const float cosC6 = -1.388833043165504932403564453125e-3;
|
||||||
|
const float cosC8 = +2.47562347794882953166961669921875e-5;
|
||||||
|
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
|
||||||
|
|
||||||
|
auto outside = select(cosUseCos, 1., x);
|
||||||
|
auto c2 = select(cosUseCos, T(cosC2), T(sinC2));
|
||||||
|
auto c4 = select(cosUseCos, T(cosC4), T(sinC4));
|
||||||
|
auto c6 = select(cosUseCos, T(cosC6), T(sinC6));
|
||||||
|
auto c8 = select(cosUseCos, T(cosC8), T(sinC8));
|
||||||
|
auto c10 = select(cosUseCos, T(cosC10), T(sinC10));
|
||||||
|
|
||||||
|
auto x2 = x * x;
|
||||||
|
auto formula = x2 * c10 + c8;
|
||||||
|
formula = x2 * formula + c6;
|
||||||
|
formula = x2 * formula + c4;
|
||||||
|
formula = x2 * formula + c2;
|
||||||
|
formula = x2 * formula + 1.;
|
||||||
|
formula *= outside;
|
||||||
|
|
||||||
|
formula = select(flipSign, -formula, formula);
|
||||||
|
return formula;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline void sincos(const T &v, T &sinResult, T &cosResult)
|
||||||
|
{
|
||||||
|
const float piOverTwoVec = 1.57079637050628662109375;
|
||||||
|
const float twoOverPiVec = 0.636619746685028076171875;
|
||||||
|
auto scaled = v * twoOverPiVec;
|
||||||
|
auto kReal = floor(scaled);
|
||||||
|
auto k = toInt(kReal);
|
||||||
|
|
||||||
|
// Reduced range version of x
|
||||||
|
auto x = v - kReal * piOverTwoVec;
|
||||||
|
auto kMod4 = k & 3;
|
||||||
|
auto cosUseCos = ((kMod4 == 0) | (kMod4 == 2));
|
||||||
|
auto sinUseCos = ((kMod4 == 1) | (kMod4 == 3));
|
||||||
|
auto sinFlipSign = (kMod4 > 1);
|
||||||
|
auto cosFlipSign = ((kMod4 == 1) | (kMod4 == 2));
|
||||||
|
|
||||||
|
const float oneVec = +1.;
|
||||||
|
const float sinC2 = -0.16666667163372039794921875;
|
||||||
|
const float sinC4 = +8.333347737789154052734375e-3;
|
||||||
|
const float sinC6 = -1.9842604524455964565277099609375e-4;
|
||||||
|
const float sinC8 = +2.760012648650445044040679931640625e-6;
|
||||||
|
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
|
||||||
|
|
||||||
|
const float cosC2 = -0.5;
|
||||||
|
const float cosC4 = +4.166664183139801025390625e-2;
|
||||||
|
const float cosC6 = -1.388833043165504932403564453125e-3;
|
||||||
|
const float cosC8 = +2.47562347794882953166961669921875e-5;
|
||||||
|
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
|
||||||
|
|
||||||
|
auto x2 = x * x;
|
||||||
|
|
||||||
|
auto sinFormula = x2 * sinC10 + sinC8;
|
||||||
|
auto cosFormula = x2 * cosC10 + cosC8;
|
||||||
|
sinFormula = x2 * sinFormula + sinC6;
|
||||||
|
cosFormula = x2 * cosFormula + cosC6;
|
||||||
|
|
||||||
|
sinFormula = x2 * sinFormula + sinC4;
|
||||||
|
cosFormula = x2 * cosFormula + cosC4;
|
||||||
|
|
||||||
|
sinFormula = x2 * sinFormula + sinC2;
|
||||||
|
cosFormula = x2 * cosFormula + cosC2;
|
||||||
|
|
||||||
|
sinFormula = x2 * sinFormula + oneVec;
|
||||||
|
cosFormula = x2 * cosFormula + oneVec;
|
||||||
|
|
||||||
|
sinFormula *= x;
|
||||||
|
|
||||||
|
sinResult = select(sinUseCos, cosFormula, sinFormula);
|
||||||
|
cosResult = select(cosUseCos, cosFormula, sinFormula);
|
||||||
|
|
||||||
|
sinResult = select(sinFlipSign, -sinResult, sinResult);
|
||||||
|
cosResult = select(cosFlipSign, -cosResult, cosResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T tan(const T &v)
|
||||||
|
{
|
||||||
|
const float piOverFourVec = 0.785398185253143310546875;
|
||||||
|
const float fourOverPiVec = 1.27323949337005615234375;
|
||||||
|
|
||||||
|
auto xLt0 = v < 0.;
|
||||||
|
auto y = select(xLt0, -v, v);
|
||||||
|
auto scaled = y * fourOverPiVec;
|
||||||
|
|
||||||
|
auto kReal = floor(scaled);
|
||||||
|
auto k = toInt(kReal);
|
||||||
|
|
||||||
|
auto x = y - kReal * piOverFourVec;
|
||||||
|
|
||||||
|
// If k & 1, x -= Pi/4
|
||||||
|
auto needOffset = (k & 1) != 0;
|
||||||
|
x = select(needOffset, x - piOverFourVec, x);
|
||||||
|
|
||||||
|
// If k & 3 == (0 or 3) let z = tan_In...(y) otherwise z = -cot_In0To...
|
||||||
|
auto kMod4 = k & 3;
|
||||||
|
auto useCotan = (kMod4 == 1) | (kMod4 == 2);
|
||||||
|
|
||||||
|
const float oneVec = 1.0;
|
||||||
|
|
||||||
|
const float tanC2 = +0.33333075046539306640625;
|
||||||
|
const float tanC4 = +0.13339905440807342529296875;
|
||||||
|
const float tanC6 = +5.3348250687122344970703125e-2;
|
||||||
|
const float tanC8 = +2.46033705770969390869140625e-2;
|
||||||
|
const float tanC10 = +2.892402000725269317626953125e-3;
|
||||||
|
const float tanC12 = +9.500005282461643218994140625e-3;
|
||||||
|
|
||||||
|
const float cotC2 = -0.3333333432674407958984375;
|
||||||
|
const float cotC4 = -2.222204394638538360595703125e-2;
|
||||||
|
const float cotC6 = -2.11752182804048061370849609375e-3;
|
||||||
|
const float cotC8 = -2.0846328698098659515380859375e-4;
|
||||||
|
const float cotC10 = -2.548247357481159269809722900390625e-5;
|
||||||
|
const float cotC12 = -3.5257363606433500535786151885986328125e-7;
|
||||||
|
|
||||||
|
auto x2 = x * x;
|
||||||
|
T z;
|
||||||
|
if (any(useCotan))
|
||||||
|
{
|
||||||
|
auto cotVal = x2 * cotC12 + cotC10;
|
||||||
|
cotVal = x2 * cotVal + cotC8;
|
||||||
|
cotVal = x2 * cotVal + cotC6;
|
||||||
|
cotVal = x2 * cotVal + cotC4;
|
||||||
|
cotVal = x2 * cotVal + cotC2;
|
||||||
|
cotVal = x2 * cotVal + oneVec;
|
||||||
|
// The equation is for x * cot(x) but we need -x * cot(x) for the tan part.
|
||||||
|
cotVal /= -x;
|
||||||
|
z = cotVal;
|
||||||
|
}
|
||||||
|
auto useTan = !useCotan;
|
||||||
|
if (any(useTan))
|
||||||
|
{
|
||||||
|
auto tanVal = x2 * tanC12 + tanC10;
|
||||||
|
tanVal = x2 * tanVal + tanC8;
|
||||||
|
tanVal = x2 * tanVal + tanC6;
|
||||||
|
tanVal = x2 * tanVal + tanC4;
|
||||||
|
tanVal = x2 * tanVal + tanC2;
|
||||||
|
tanVal = x2 * tanVal + oneVec;
|
||||||
|
// Equation was for tan(x)/x
|
||||||
|
tanVal *= x;
|
||||||
|
z = select(useTan, tanVal, z);
|
||||||
|
}
|
||||||
|
return select(xLt0, -z, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T asin(const T &x0)
|
||||||
|
{
|
||||||
|
auto isneg = (x0 < 0.f);
|
||||||
|
auto x = abs(x0);
|
||||||
|
auto isnan = (x > 1.f);
|
||||||
|
|
||||||
|
// sollya
|
||||||
|
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
|
||||||
|
// [1e-20;.9999999999999999]);
|
||||||
|
// avg error: 1.1105439e-06, max error 1.3187528e-06
|
||||||
|
auto v = 1.57079517841339111328125f +
|
||||||
|
x * (-0.21450997889041900634765625f +
|
||||||
|
x * (8.78556668758392333984375e-2f +
|
||||||
|
x * (-4.489909112453460693359375e-2f +
|
||||||
|
x * (1.928029954433441162109375e-2f +
|
||||||
|
x * (-4.3095736764371395111083984375e-3f)))));
|
||||||
|
|
||||||
|
v *= -sqrt(1.f - x);
|
||||||
|
v = v + 1.57079637050628662109375f;
|
||||||
|
|
||||||
|
v = select(v < 0.f, T(0.f), v);
|
||||||
|
v = select(isneg, -v, v);
|
||||||
|
v = select(isnan, T(cast_i2f(0x7fc00000)), v);
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T acos(const T &v)
|
||||||
|
{
|
||||||
|
return 1.57079637050628662109375f - asin(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T atan(const T &v)
|
||||||
|
{
|
||||||
|
const float piOverTwoVec = 1.57079637050628662109375;
|
||||||
|
// atan(-x) = -atan(x) (so flip from negative to positive first)
|
||||||
|
// If x > 1 -> atan(x) = Pi/2 - atan(1/x)
|
||||||
|
auto xNeg = v < 0.f;
|
||||||
|
auto xFlipped = select(xNeg, -v, v);
|
||||||
|
|
||||||
|
auto xGt1 = xFlipped > 1.;
|
||||||
|
auto x = select(xGt1, rcpSafe(xFlipped), xFlipped);
|
||||||
|
|
||||||
|
// These coefficients approximate atan(x)/x
|
||||||
|
const float atanC0 = +0.99999988079071044921875;
|
||||||
|
const float atanC2 = -0.3333191573619842529296875;
|
||||||
|
const float atanC4 = +0.199689209461212158203125;
|
||||||
|
const float atanC6 = -0.14015688002109527587890625;
|
||||||
|
const float atanC8 = +9.905083477497100830078125e-2;
|
||||||
|
const float atanC10 = -5.93664981424808502197265625e-2;
|
||||||
|
const float atanC12 = +2.417283318936824798583984375e-2;
|
||||||
|
const float atanC14 = -4.6721356920897960662841796875e-3;
|
||||||
|
|
||||||
|
auto x2 = x * x;
|
||||||
|
auto result = x2 * atanC14 + atanC12;
|
||||||
|
result = x2 * result + atanC10;
|
||||||
|
result = x2 * result + atanC8;
|
||||||
|
result = x2 * result + atanC6;
|
||||||
|
result = x2 * result + atanC4;
|
||||||
|
result = x2 * result + atanC2;
|
||||||
|
result = x2 * result + atanC0;
|
||||||
|
result *= x;
|
||||||
|
|
||||||
|
result = select(xGt1, piOverTwoVec - result, result);
|
||||||
|
result = select(xNeg, -result, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T atan2(const T &y, const T &x)
|
||||||
|
{
|
||||||
|
const float piVec = 3.1415926536;
|
||||||
|
// atan2(y, x) =
|
||||||
|
//
|
||||||
|
// atan2(y > 0, x = +-0) -> Pi/2
|
||||||
|
// atan2(y < 0, x = +-0) -> -Pi/2
|
||||||
|
// atan2(y = +-0, x < +0) -> +-Pi
|
||||||
|
// atan2(y = +-0, x >= +0) -> +-0
|
||||||
|
//
|
||||||
|
// atan2(y >= 0, x < 0) -> Pi + atan(y/x)
|
||||||
|
// atan2(y < 0, x < 0) -> -Pi + atan(y/x)
|
||||||
|
// atan2(y, x > 0) -> atan(y/x)
|
||||||
|
//
|
||||||
|
// and then a bunch of code for dealing with infinities.
|
||||||
|
auto yOverX = y * rcpSafe(x);
|
||||||
|
auto atanArg = atan(yOverX);
|
||||||
|
auto xLt0 = x < 0.f;
|
||||||
|
auto yLt0 = y < 0.f;
|
||||||
|
auto offset = select(xLt0,
|
||||||
|
select(yLt0, T(-piVec), T(piVec)), 0.f);
|
||||||
|
return offset + atanArg;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T exp(const T &v)
|
||||||
|
{
|
||||||
|
const float ln2Part1 = 0.6931457519;
|
||||||
|
const float ln2Part2 = 1.4286067653e-6;
|
||||||
|
const float oneOverLn2 = 1.44269502162933349609375;
|
||||||
|
|
||||||
|
auto scaled = v * oneOverLn2;
|
||||||
|
auto kReal = floor(scaled);
|
||||||
|
auto k = toInt(kReal);
|
||||||
|
|
||||||
|
// Reduced range version of x
|
||||||
|
auto x = v - kReal * ln2Part1;
|
||||||
|
x -= kReal * ln2Part2;
|
||||||
|
|
||||||
|
// These coefficients are for e^x in [0, ln(2)]
|
||||||
|
const float one = 1.;
|
||||||
|
const float c2 = 0.4999999105930328369140625;
|
||||||
|
const float c3 = 0.166668415069580078125;
|
||||||
|
const float c4 = 4.16539050638675689697265625e-2;
|
||||||
|
const float c5 = 8.378830738365650177001953125e-3;
|
||||||
|
const float c6 = 1.304379315115511417388916015625e-3;
|
||||||
|
const float c7 = 2.7555381529964506626129150390625e-4;
|
||||||
|
|
||||||
|
auto result = x * c7 + c6;
|
||||||
|
result = x * result + c5;
|
||||||
|
result = x * result + c4;
|
||||||
|
result = x * result + c3;
|
||||||
|
result = x * result + c2;
|
||||||
|
result = x * result + one;
|
||||||
|
result = x * result + one;
|
||||||
|
|
||||||
|
// Compute 2^k (should differ for float and double, but I'll avoid
|
||||||
|
// it for now and just do floats)
|
||||||
|
const int fpbias = 127;
|
||||||
|
auto biasedN = k + fpbias;
|
||||||
|
auto overflow = kReal > fpbias;
|
||||||
|
// Minimum exponent is -126, so if k is <= -127 (k + 127 <= 0)
|
||||||
|
// we've got underflow. -127 * ln(2) -> -88.02. So the most
|
||||||
|
// negative float input that doesn't result in zero is like -88.
|
||||||
|
auto underflow = kReal <= -fpbias;
|
||||||
|
const int infBits = 0x7f800000;
|
||||||
|
biasedN <<= 23;
|
||||||
|
// Reinterpret this thing as float
|
||||||
|
auto twoToTheN = asFloat(biasedN);
|
||||||
|
// Handle both doubles and floats (hopefully eliding the copy for float)
|
||||||
|
auto elemtype2n = twoToTheN;
|
||||||
|
result *= elemtype2n;
|
||||||
|
result = select(overflow, cast_i2f(infBits), result);
|
||||||
|
result = select(underflow, 0., result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
|
||||||
|
// * log(2) + log(y) where y is the reduced range (usually in [1/2, 1)).
|
||||||
|
template <typename T, typename R>
|
||||||
|
__forceinline void __rangeReduceLog(const T &input,
|
||||||
|
T &reduced,
|
||||||
|
R &exponent)
|
||||||
|
{
|
||||||
|
auto intVersion = asInt(input);
|
||||||
|
// single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
|
||||||
|
// exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000
|
||||||
|
// 0x7 0xF 0x8 0x0 0x0 0x0 0x0 0x0
|
||||||
|
// non-exponent = 1000 0000 0111 1111 1111 1111 1111 1111
|
||||||
|
// = 0x8 0x0 0x7 0xF 0xF 0xF 0xF 0xF
|
||||||
|
|
||||||
|
//const int exponentMask(0x7F800000)
|
||||||
|
static const int nonexponentMask = 0x807FFFFF;
|
||||||
|
|
||||||
|
// We want the reduced version to have an exponent of -1 which is
|
||||||
|
// -1 + 127 after biasing or 126
|
||||||
|
static const int exponentNeg1 = (126l << 23);
|
||||||
|
// NOTE(boulos): We don't need to mask anything out since we know
|
||||||
|
// the sign bit has to be 0. If it's 1, we need to return infinity/nan
|
||||||
|
// anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
|
||||||
|
auto biasedExponent = intVersion >> 23; // This number is [0, 255] but it means [-127, 128]
|
||||||
|
|
||||||
|
auto offsetExponent = biasedExponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2
|
||||||
|
exponent = offsetExponent - 127; // get the real value
|
||||||
|
|
||||||
|
// Blend the offset_exponent with the original input (do this in
|
||||||
|
// int for now, until I decide if float can have & and ¬)
|
||||||
|
auto blended = (intVersion & nonexponentMask) | (exponentNeg1);
|
||||||
|
reduced = asFloat(blended);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> struct ExponentType { };
|
||||||
|
template <int N> struct ExponentType<vfloat_impl<N>> { typedef vint<N> Ty; };
|
||||||
|
template <> struct ExponentType<float> { typedef int Ty; };
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T log(const T &v)
|
||||||
|
{
|
||||||
|
T reduced;
|
||||||
|
typename ExponentType<T>::Ty exponent;
|
||||||
|
|
||||||
|
const int nanBits = 0x7fc00000;
|
||||||
|
const int negInfBits = 0xFF800000;
|
||||||
|
const float nan = cast_i2f(nanBits);
|
||||||
|
const float negInf = cast_i2f(negInfBits);
|
||||||
|
auto useNan = v < 0.;
|
||||||
|
auto useInf = v == 0.;
|
||||||
|
auto exceptional = useNan | useInf;
|
||||||
|
const float one = 1.0;
|
||||||
|
|
||||||
|
auto patched = select(exceptional, one, v);
|
||||||
|
__rangeReduceLog(patched, reduced, exponent);
|
||||||
|
|
||||||
|
const float ln2 = 0.693147182464599609375;
|
||||||
|
|
||||||
|
auto x1 = one - reduced;
|
||||||
|
const float c1 = +0.50000095367431640625;
|
||||||
|
const float c2 = +0.33326041698455810546875;
|
||||||
|
const float c3 = +0.2519190013408660888671875;
|
||||||
|
const float c4 = +0.17541764676570892333984375;
|
||||||
|
const float c5 = +0.3424419462680816650390625;
|
||||||
|
const float c6 = -0.599632322788238525390625;
|
||||||
|
const float c7 = +1.98442304134368896484375;
|
||||||
|
const float c8 = -2.4899270534515380859375;
|
||||||
|
const float c9 = +1.7491014003753662109375;
|
||||||
|
|
||||||
|
auto result = x1 * c9 + c8;
|
||||||
|
result = x1 * result + c7;
|
||||||
|
result = x1 * result + c6;
|
||||||
|
result = x1 * result + c5;
|
||||||
|
result = x1 * result + c4;
|
||||||
|
result = x1 * result + c3;
|
||||||
|
result = x1 * result + c2;
|
||||||
|
result = x1 * result + c1;
|
||||||
|
result = x1 * result + one;
|
||||||
|
|
||||||
|
// Equation was for -(ln(red)/(1-red))
|
||||||
|
result *= -x1;
|
||||||
|
result += toFloat(exponent) * ln2;
|
||||||
|
|
||||||
|
return select(exceptional,
|
||||||
|
select(useNan, T(nan), T(negInf)),
|
||||||
|
result);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T pow(const T &x, const T &y)
|
||||||
|
{
|
||||||
|
auto x1 = abs(x);
|
||||||
|
auto z = exp(y * log(x1));
|
||||||
|
|
||||||
|
// Handle special cases
|
||||||
|
const float twoOver23 = 8388608.0f;
|
||||||
|
auto yInt = y == round(y);
|
||||||
|
auto yOddInt = select(yInt, asInt(abs(y) + twoOver23) << 31, 0); // set sign bit
|
||||||
|
|
||||||
|
// x == 0
|
||||||
|
z = select(x == 0.0f,
|
||||||
|
select(y < 0.0f, T(inf) | signmsk(x),
|
||||||
|
select(y == 0.0f, T(1.0f), asFloat(yOddInt) & x)), z);
|
||||||
|
|
||||||
|
// x < 0
|
||||||
|
auto xNegative = x < 0.0f;
|
||||||
|
if (any(xNegative))
|
||||||
|
{
|
||||||
|
auto z1 = z | asFloat(yOddInt);
|
||||||
|
z1 = select(yInt, z1, std::numeric_limits<float>::quiet_NaN());
|
||||||
|
z = select(xNegative, z1, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto xFinite = isfinite(x);
|
||||||
|
auto yFinite = isfinite(y);
|
||||||
|
if (all(xFinite & yFinite))
|
||||||
|
return z;
|
||||||
|
|
||||||
|
// x finite and y infinite
|
||||||
|
z = select(andn(xFinite, yFinite),
|
||||||
|
select(x1 == 1.0f, 1.0f,
|
||||||
|
select((x1 > 1.0f) ^ (y < 0.0f), inf, T(0.0f))), z);
|
||||||
|
|
||||||
|
// x infinite
|
||||||
|
z = select(xFinite, z,
|
||||||
|
select(y == 0.0f, 1.0f,
|
||||||
|
select(y < 0.0f, T(0.0f), inf) | (asFloat(yOddInt) & x)));
|
||||||
|
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__forceinline T pow(const T &x, float y)
|
||||||
|
{
|
||||||
|
return pow(x, T(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastapprox
|
||||||
|
|
||||||
|
} // namespace embree
|
||||||
87
Framework/external/embree/common/math/vec.h
vendored
Normal file
87
Framework/external/embree/common/math/vec.h
vendored
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "vec2.h"
|
||||||
|
#include "vec3.h"
|
||||||
|
#include "vec4.h"
|
||||||
|
|
||||||
|
namespace embree {
|
||||||
|
|
||||||
|
__forceinline Vec3f neg(const Vec3f& a ) { return -a; }
|
||||||
|
__forceinline Vec3fa neg(const Vec3fa& a) { return -a; }
|
||||||
|
__forceinline bool eq (const Vec3fa& a, const Vec3fa& b) { return a == b; }
|
||||||
|
__forceinline bool ne (const Vec3fa& a, const Vec3fa& b) { return a != b; }
|
||||||
|
|
||||||
|
// FIXME: change order of lerp arguments, then remove this function
|
||||||
|
template<typename V>
|
||||||
|
__forceinline V lerpr(float t, const V& v0, const V& v1) {
|
||||||
|
return (1.0f-t)*v0 + t*v1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------
|
||||||
|
// sRGB conversion functions
|
||||||
|
// -------------------------------------------------------
|
||||||
|
#define APPROXIMATE_SRGB
|
||||||
|
|
||||||
|
inline float linear_to_srgb(const float f)
|
||||||
|
{
|
||||||
|
const float c = max(f, 0.f);
|
||||||
|
#ifdef APPROXIMATE_SRGB
|
||||||
|
return pow(c, 1.f/2.2f);
|
||||||
|
#else
|
||||||
|
return c <= 0.0031308f ? 12.92f*c : pow(c, 1.f/2.4f)*1.055f - 0.055f;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vec4f linear_to_srgba(const Vec4f c)
|
||||||
|
{
|
||||||
|
return Vec4f(linear_to_srgb(c.x),
|
||||||
|
linear_to_srgb(c.y),
|
||||||
|
linear_to_srgb(c.z),
|
||||||
|
max(c.w, 0.f)); // alpha is never gamma-corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32_t linear_to_srgba8(const Vec4f c)
|
||||||
|
{
|
||||||
|
#if 1
|
||||||
|
Vec4f l = 255.f * min(linear_to_srgba(c), Vec4f(1.f));
|
||||||
|
return
|
||||||
|
((uint32_t)l.x << 0) |
|
||||||
|
((uint32_t)l.y << 8) |
|
||||||
|
((uint32_t)l.z << 16) |
|
||||||
|
((uint32_t)l.w << 24);
|
||||||
|
#else
|
||||||
|
// TODO use ISPC's float_to_srgb8 once it is fixed (issue #1198)
|
||||||
|
return
|
||||||
|
(float_to_srgb8(c.x) << 0) |
|
||||||
|
(float_to_srgb8(c.y) << 8) |
|
||||||
|
(float_to_srgb8(c.z) << 16) |
|
||||||
|
((uint32_t)clamp(c.w, 0.f, 1.f) << 24); // alpha is never gamma-corrected
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float srgb_to_linear(const float f)
|
||||||
|
{
|
||||||
|
const float c = max(f, 0.f);
|
||||||
|
#ifdef APPROXIMATE_SRGB
|
||||||
|
return pow(c, 2.2f);
|
||||||
|
#else
|
||||||
|
return c <= 0.04045f ? c/12.92f : pow((c + 0.055f)/1.055f, 2.4f);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vec4f srgba_to_linear(const Vec4f c)
|
||||||
|
{
|
||||||
|
return Vec4f(srgb_to_linear(c.x),
|
||||||
|
srgb_to_linear(c.y),
|
||||||
|
srgb_to_linear(c.z),
|
||||||
|
max(c.w, 0.f)); // alpha is never gamma-corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO implement srgba8_to_linear with a 256 entry LUT
|
||||||
|
|
||||||
|
#undef APPROXIMATE_SRGB
|
||||||
|
|
||||||
|
} // namespace embree
|
||||||
236
Framework/external/embree/common/math/vec2.h
vendored
Normal file
236
Framework/external/embree/common/math/vec2.h
vendored
Normal file
|
|
@ -0,0 +1,236 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
struct Vec2fa;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Generic 2D vector Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct Vec2
|
||||||
|
{
|
||||||
|
enum { N = 2 };
|
||||||
|
union {
|
||||||
|
struct { T x, y; };
|
||||||
|
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
|
||||||
|
T components[N];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef T Scalar;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2( ) {}
|
||||||
|
__forceinline explicit Vec2( const T& a ) : x(a), y(a) {}
|
||||||
|
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
|
||||||
|
|
||||||
|
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
|
||||||
|
Vec2( const Vec2fa& other );
|
||||||
|
|
||||||
|
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
|
||||||
|
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2( ZeroTy ) : x(zero), y(zero) {}
|
||||||
|
__forceinline Vec2( OneTy ) : x(one), y(one) {}
|
||||||
|
__forceinline Vec2( PosInfTy ) : x(pos_inf), y(pos_inf) {}
|
||||||
|
__forceinline Vec2( NegInfTy ) : x(neg_inf), y(neg_inf) {}
|
||||||
|
|
||||||
|
#if defined(__WIN32__) && _MSC_VER == 1800 // workaround for older VS 2013 compiler
|
||||||
|
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return (&x)[axis]; }
|
||||||
|
__forceinline T& operator [](const size_t axis) { assert(axis < 2); return (&x)[axis]; }
|
||||||
|
#else
|
||||||
|
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return components[axis]; }
|
||||||
|
__forceinline T& operator [](const size_t axis ) { assert(axis < 2); return components[axis]; }
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a ) { return Vec2<T>(+a.x, +a.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a ) { return Vec2<T>(-a.x, -a.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> abs ( const Vec2<T>& a ) { return Vec2<T>(abs (a.x), abs (a.y)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> rcp ( const Vec2<T>& a ) { return Vec2<T>(rcp (a.x), rcp (a.y)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> rsqrt ( const Vec2<T>& a ) { return Vec2<T>(rsqrt(a.x), rsqrt(a.y)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> sqrt ( const Vec2<T>& a ) { return Vec2<T>(sqrt (a.x), sqrt (a.y)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> frac ( const Vec2<T>& a ) { return Vec2<T>(frac (a.x), frac (a.y)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x + b.x, a.y + b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x + b , a.y + b ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator +( const T& a, const Vec2<T>& b ) { return Vec2<T>(a + b.x, a + b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x - b.x, a.y - b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x - b , a.y - b ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator -( const T& a, const Vec2<T>& b ) { return Vec2<T>(a - b.x, a - b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x * b.x, a.y * b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator *( const T& a, const Vec2<T>& b ) { return Vec2<T>(a * b.x, a * b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x * b , a.y * b ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x / b.x, a.y / b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x / b , a.y / b ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> operator /( const T& a, const Vec2<T>& b ) { return Vec2<T>(a / b.x, a / b.y); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> min(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(min(a.x, b.x), min(a.y, b.y)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> max(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(max(a.x, b.x), max(a.y, b.y)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> madd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> msub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> nmadd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> nmsub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y) ); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> madd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a,b.x,c.x), madd(a,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> msub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a,b.x,c.x), msub(a,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> nmadd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y) ); }
|
||||||
|
template<typename T> __forceinline Vec2<T> nmsub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y) ); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T>& operator +=( Vec2<T>& a, const Vec2<T>& b ) { a.x += b.x; a.y += b.y; return a; }
|
||||||
|
template<typename T> __forceinline Vec2<T>& operator -=( Vec2<T>& a, const Vec2<T>& b ) { a.x -= b.x; a.y -= b.y; return a; }
|
||||||
|
template<typename T> __forceinline Vec2<T>& operator *=( Vec2<T>& a, const T& b ) { a.x *= b ; a.y *= b ; return a; }
|
||||||
|
template<typename T> __forceinline Vec2<T>& operator /=( Vec2<T>& a, const T& b ) { a.x /= b ; a.y /= b ; return a; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reduction Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T reduce_add( const Vec2<T>& a ) { return a.x + a.y; }
|
||||||
|
template<typename T> __forceinline T reduce_mul( const Vec2<T>& a ) { return a.x * a.y; }
|
||||||
|
template<typename T> __forceinline T reduce_min( const Vec2<T>& a ) { return min(a.x, a.y); }
|
||||||
|
template<typename T> __forceinline T reduce_max( const Vec2<T>& a ) { return max(a.x, a.y); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const Vec2<T>& a, const Vec2<T>& b ) { return a.x == b.x && a.y == b.y; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const Vec2<T>& a, const Vec2<T>& b ) { return a.x != b.x || a.y != b.y; }
|
||||||
|
template<typename T> __forceinline bool operator < ( const Vec2<T>& a, const Vec2<T>& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Shift Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> shift_right_1( const Vec2<T>& a ) {
|
||||||
|
return Vec2<T>(shift_right_1(a.x),shift_right_1(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T dot ( const Vec2<T>& a, const Vec2<T>& b ) { return madd(a.x,b.x,a.y*b.y); }
|
||||||
|
template<typename T> __forceinline Vec2<T> cross ( const Vec2<T>& a ) { return Vec2<T>(-a.y,a.x); }
|
||||||
|
template<typename T> __forceinline T length ( const Vec2<T>& a ) { return sqrt(dot(a,a)); }
|
||||||
|
template<typename T> __forceinline Vec2<T> normalize( const Vec2<T>& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
template<typename T> __forceinline T distance ( const Vec2<T>& a, const Vec2<T>& b ) { return length(a-b); }
|
||||||
|
template<typename T> __forceinline T det ( const Vec2<T>& a, const Vec2<T>& b ) { return a.x*b.y - a.y*b.x; }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> normalize_safe( const Vec2<T>& a ) {
|
||||||
|
const T d = dot(a,a); return select(d == T( zero ),a, a*rsqrt(d) );
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> select ( bool s, const Vec2<T>& t, const Vec2<T>& f ) {
|
||||||
|
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> select ( const Vec2<bool>& s, const Vec2<T>& t, const Vec2<T>& f ) {
|
||||||
|
return Vec2<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec2<T> select ( const typename T::Bool& s, const Vec2<T>& t, const Vec2<T>& f ) {
|
||||||
|
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline Vec2<T> lerp(const Vec2<T>& v0, const Vec2<T>& v1, const T& t) {
|
||||||
|
return madd(Vec2<T>(T(1.0f)-t),v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline int maxDim ( const Vec2<T>& a )
|
||||||
|
{
|
||||||
|
const Vec2<T> b = abs(a);
|
||||||
|
if (b.x > b.y) return 0;
|
||||||
|
else return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2<T>& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Default template instantiations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
typedef Vec2<bool > Vec2b;
|
||||||
|
typedef Vec2<int > Vec2i;
|
||||||
|
typedef Vec2<float> Vec2f;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "vec2fa.h"
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
#include "../simd/avx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
#include "../simd/avx512.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec2<vfloat8>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
325
Framework/external/embree/common/math/vec2fa.h
vendored
Normal file
325
Framework/external/embree/common/math/vec2fa.h
vendored
Normal file
|
|
@ -0,0 +1,325 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "vec2fa_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec2fa Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec2fa
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 2 };
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { float x,y,az,aw; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa( ) {}
|
||||||
|
__forceinline Vec2fa( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
|
||||||
|
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
|
||||||
|
__forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
|
||||||
|
__forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
|
||||||
|
|
||||||
|
__forceinline operator const __m128&() const { return m128; }
|
||||||
|
__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec2fa load( const void* const a ) {
|
||||||
|
return Vec2fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec2fa loadu( const void* const a ) {
|
||||||
|
return Vec2fa(_mm_and_ps(_mm_loadu_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
|
||||||
|
_mm_storeu_ps((float*)ptr,v);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
|
||||||
|
__forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
|
||||||
|
__forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
__forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
|
||||||
|
__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
|
||||||
|
__forceinline Vec2fa operator -( const Vec2fa& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||||
|
return _mm_xor_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa abs ( const Vec2fa& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||||
|
return _mm_and_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa sign ( const Vec2fa& a ) {
|
||||||
|
return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa rcp ( const Vec2fa& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 reciprocal = _mm_rcp_ps(a.m128);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
|
||||||
|
return (const Vec2fa)reciprocal;
|
||||||
|
#else
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
const Vec2fa r = _mm_rcp14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
const Vec2fa r = _mm_rcp_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
|
||||||
|
const Vec2fa res = _mm_fmadd_ps(r, h_n, r); // Then compute r + r * h_n
|
||||||
|
#else
|
||||||
|
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)); // First, compute 1 - a * r (which will be very close to 0)
|
||||||
|
const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n)); // Then compute r + r * h_n
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return res;
|
||||||
|
#endif //defined(__aarch64__)
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
|
||||||
|
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa rsqrt( const Vec2fa& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
return r;
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
__m128 r = _mm_rsqrt14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
|
||||||
|
return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa log ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(logf(a.x),logf(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa exp ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(expf(a.x),expf(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
|
||||||
|
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
|
||||||
|
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
|
||||||
|
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
|
||||||
|
return Vec2fa(powf(a.x,b),powf(a.y,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
|
||||||
|
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
|
||||||
|
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
|
||||||
|
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
|
||||||
|
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
|
||||||
|
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b+c;}
|
||||||
|
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b-c; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
|
||||||
|
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
|
||||||
|
__forceinline float reduce_min(const Vec2fa& v) { return min(v.x,v.y); }
|
||||||
|
__forceinline float reduce_max(const Vec2fa& v) { return max(v.x,v.y); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
|
||||||
|
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
|
||||||
|
return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec2fa cross ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(-a.y,a.x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
|
||||||
|
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
|
||||||
|
return blendv_ps(f, t, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec2fa& a )
|
||||||
|
{
|
||||||
|
const Vec2fa b = abs(a);
|
||||||
|
if (b.x > b.y) return 0;
|
||||||
|
else return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
|
||||||
|
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
|
||||||
|
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
|
||||||
|
#elif defined (__SSE4_1__)
|
||||||
|
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
|
||||||
|
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
|
||||||
|
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
|
||||||
|
#else
|
||||||
|
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
|
||||||
|
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }
|
||||||
|
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(ceilf (a.x),ceilf (a.y)); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Vec2fa Vec2fa_t;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
270
Framework/external/embree/common/math/vec2fa_sycl.h
vendored
Normal file
270
Framework/external/embree/common/math/vec2fa_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
struct Vec3fa;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec2fa Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec2fa
|
||||||
|
{
|
||||||
|
//ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 2 };
|
||||||
|
struct { float x,y; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa( ) {}
|
||||||
|
//__forceinline Vec2fa( const __m128 a ) : m128(a) {}
|
||||||
|
explicit Vec2fa(const Vec3fa& a);
|
||||||
|
|
||||||
|
__forceinline explicit Vec2fa( const vfloat<4>& a ) {
|
||||||
|
x = a[0];
|
||||||
|
y = a[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
|
||||||
|
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
|
||||||
|
__forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
|
||||||
|
__forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
|
||||||
|
|
||||||
|
//__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec2fa load( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec2fa(ptr[0],ptr[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec2fa loadu( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec2fa(ptr[0],ptr[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* a, const Vec2fa& v ) {
|
||||||
|
float* ptr = (float*)a;
|
||||||
|
ptr[0] = v.x; ptr[1] = v.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
|
||||||
|
__forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
|
||||||
|
__forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
|
||||||
|
__forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
//__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
|
||||||
|
//__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
|
||||||
|
__forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
|
||||||
|
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
|
||||||
|
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
|
||||||
|
|
||||||
|
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
|
||||||
|
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
|
||||||
|
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
|
||||||
|
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
|
||||||
|
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||||
|
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||||
|
return Vec2fa(x,y);
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa log ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(sycl::log(a.x),sycl::log(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa exp ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
|
||||||
|
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
|
||||||
|
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
|
||||||
|
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
|
||||||
|
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
|
||||||
|
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
|
||||||
|
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
|
||||||
|
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
|
||||||
|
return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
|
||||||
|
}
|
||||||
|
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
|
||||||
|
return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
|
||||||
|
return Vec2fa(powf(a.x,b),powf(a.y,b));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
|
||||||
|
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
|
||||||
|
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
|
||||||
|
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
|
||||||
|
|
||||||
|
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
|
||||||
|
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
|
||||||
|
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
|
||||||
|
__forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
|
||||||
|
__forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
|
||||||
|
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidian Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa cross ( const Vec2fa& a ) {
|
||||||
|
return Vec2fa(-a.y,a.x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
|
||||||
|
return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec2fa& a )
|
||||||
|
{
|
||||||
|
const Vec2fa b = abs(a);
|
||||||
|
if (b.x > b.y) return 0;
|
||||||
|
else return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
|
||||||
|
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
|
||||||
|
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*template<>
|
||||||
|
__forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
|
||||||
|
{
|
||||||
|
v = 0;
|
||||||
|
const unsigned int lid = get_sub_group_local_id();
|
||||||
|
if (lid == 0) v = a.x;
|
||||||
|
if (lid == 1) v = a.y;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
typedef Vec2fa Vec2fa_t;
|
||||||
|
}
|
||||||
357
Framework/external/embree/common/math/vec3.h
vendored
Normal file
357
Framework/external/embree/common/math/vec3.h
vendored
Normal file
|
|
@ -0,0 +1,357 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
struct Vec3fa;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Generic 3D vector Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct Vec3
|
||||||
|
{
|
||||||
|
enum { N = 3 };
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
T x, y, z;
|
||||||
|
};
|
||||||
|
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
|
||||||
|
T components[N];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef T Scalar;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3( ) {}
|
||||||
|
__forceinline explicit Vec3( const T& a ) : x(a), y(a), z(a) {}
|
||||||
|
__forceinline Vec3( const T& x, const T& y, const T& z ) : x(x), y(y), z(z) {}
|
||||||
|
|
||||||
|
__forceinline Vec3( const Vec3& other ) { x = other.x; y = other.y; z = other.z; }
|
||||||
|
__forceinline Vec3( const Vec3fa& other );
|
||||||
|
|
||||||
|
template<typename T1> __forceinline Vec3( const Vec3<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)) {}
|
||||||
|
template<typename T1> __forceinline Vec3& operator =(const Vec3<T1>& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3( ZeroTy ) : x(zero), y(zero), z(zero) {}
|
||||||
|
__forceinline Vec3( OneTy ) : x(one), y(one), z(one) {}
|
||||||
|
__forceinline Vec3( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf) {}
|
||||||
|
__forceinline Vec3( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf) {}
|
||||||
|
|
||||||
|
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
|
||||||
|
__forceinline const T& operator []( const size_t axis ) const { assert(axis < 3); return (&x)[axis]; }
|
||||||
|
__forceinline T& operator []( const size_t axis ) { assert(axis < 3); return (&x)[axis]; }
|
||||||
|
#else
|
||||||
|
__forceinline const T& operator [](const size_t axis) const { assert(axis < 3); return components[axis]; }
|
||||||
|
__forceinline T& operator [](const size_t axis) { assert(axis < 3); return components[axis]; }
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a ) { return Vec3<T>(+a.x, +a.y, +a.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a ) { return Vec3<T>(-a.x, -a.y, -a.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> abs ( const Vec3<T>& a ) { return Vec3<T>(abs (a.x), abs (a.y), abs (a.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> rcp ( const Vec3<T>& a ) { return Vec3<T>(rcp (a.x), rcp (a.y), rcp (a.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> rsqrt ( const Vec3<T>& a ) { return Vec3<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> sqrt ( const Vec3<T>& a ) { return Vec3<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> zero_fix( const Vec3<T>& a )
|
||||||
|
{
|
||||||
|
return Vec3<T>(select(abs(a.x)<min_rcp_input,T(min_rcp_input),a.x),
|
||||||
|
select(abs(a.y)<min_rcp_input,T(min_rcp_input),a.y),
|
||||||
|
select(abs(a.z)<min_rcp_input,T(min_rcp_input),a.z));
|
||||||
|
}
|
||||||
|
template<typename T> __forceinline Vec3<T> rcp_safe(const Vec3<T>& a) { return rcp(zero_fix(a)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x + b.x, a.y + b.y, a.z + b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator *( const T& a, const Vec3<T>& b ) { return Vec3<T>(a * b.x, a * b.y, a * b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x * b , a.y * b , a.z * b ); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x / b , a.y / b , a.z / b ); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator /( const T& a, const Vec3<T>& b ) { return Vec3<T>(a / b.x, a / b.y, a / b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x / b.x, a.y / b.y, a.z / b.z); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> min(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> max(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> operator >>( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x >> b, a.y >> b, a.z >> b); }
|
||||||
|
template<typename T> __forceinline Vec3<T> operator <<( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x << b, a.y << b, a.z << b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> madd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> msub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> nmadd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z));}
|
||||||
|
template<typename T> __forceinline Vec3<T> nmsub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> madd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> msub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> nmadd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z));}
|
||||||
|
template<typename T> __forceinline Vec3<T> nmsub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const T b ) { a.x += b; a.y += b; a.z += b; return a; }
|
||||||
|
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const Vec3<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; return a; }
|
||||||
|
template<typename T> __forceinline Vec3<T>& operator -=( Vec3<T>& a, const Vec3<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; return a; }
|
||||||
|
template<typename T> __forceinline Vec3<T>& operator *=( Vec3<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; return a; }
|
||||||
|
template<typename T> __forceinline Vec3<T>& operator /=( Vec3<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; return a; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reduction Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T reduce_add( const Vec3<T>& a ) { return a.x + a.y + a.z; }
|
||||||
|
template<typename T> __forceinline T reduce_mul( const Vec3<T>& a ) { return a.x * a.y * a.z; }
|
||||||
|
template<typename T> __forceinline T reduce_min( const Vec3<T>& a ) { return min(a.x, a.y, a.z); }
|
||||||
|
template<typename T> __forceinline T reduce_max( const Vec3<T>& a ) { return max(a.x, a.y, a.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const Vec3<T>& a, const Vec3<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const Vec3<T>& a, const Vec3<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
|
||||||
|
template<typename T> __forceinline bool operator < ( const Vec3<T>& a, const Vec3<T>& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Shift Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> shift_right_1( const Vec3<T>& a ) {
|
||||||
|
return Vec3<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> select ( bool s, const Vec3<T>& t, const Vec3<T>& f ) {
|
||||||
|
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> select ( const Vec3<bool>& s, const Vec3<T>& t, const Vec3<T>& f ) {
|
||||||
|
return Vec3<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> select ( const typename T::Bool& s, const Vec3<T>& t, const Vec3<T>& f ) {
|
||||||
|
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline Vec3<T> lerp(const Vec3<T>& v0, const Vec3<T>& v1, const T& t) {
|
||||||
|
return madd(Vec3<T>(T(1.0f)-t),v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline int maxDim ( const Vec3<T>& a )
|
||||||
|
{
|
||||||
|
const Vec3<T> b = abs(a);
|
||||||
|
if (b.x > b.y) {
|
||||||
|
if (b.x > b.z) return 0; else return 2;
|
||||||
|
} else {
|
||||||
|
if (b.y > b.z) return 1; else return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<bool> eq_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x==b.x,a.y==b.y,a.z==b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<bool> neq_mask(const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x!=b.x,a.y!=b.y,a.z!=b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<bool> lt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x< b.x,a.y< b.y,a.z< b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<bool> le_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x<=b.x,a.y<=b.y,a.z<=b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<bool> gt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x> b.x,a.y> b.y,a.z> b.z); }
|
||||||
|
template<typename T> __forceinline Vec3<bool> ge_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x>=b.x,a.y>=b.y,a.z>=b.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T sqr ( const Vec3<T>& a ) { return dot(a,a); }
|
||||||
|
template<typename T> __forceinline T dot ( const Vec3<T>& a, const Vec3<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,a.z*b.z)); }
|
||||||
|
template<typename T> __forceinline T length ( const Vec3<T>& a ) { return sqrt(sqr(a)); }
|
||||||
|
template<typename T> __forceinline T rcp_length( const Vec3<T>& a ) { return rsqrt(sqr(a)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a ) { return a*rsqrt(sqr(a)); }
|
||||||
|
template<typename T> __forceinline T distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); }
|
||||||
|
template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); }
|
||||||
|
template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c )
|
||||||
|
{
|
||||||
|
const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x;
|
||||||
|
const T bc_x = b.z*c.y, bc_y = b.x*c.z, bc_z = b.y*c.x;
|
||||||
|
const Vec3<T> cross_ab(msub(a.y,b.z,ab_x), msub(a.z,b.x,ab_y), msub(a.x,b.y,ab_z));
|
||||||
|
const Vec3<T> cross_bc(msub(b.y,c.z,bc_x), msub(b.z,c.x,bc_y), msub(b.x,c.y,bc_z));
|
||||||
|
const auto sx = abs(ab_x) < abs(bc_x);
|
||||||
|
const auto sy = abs(ab_y) < abs(bc_y);
|
||||||
|
const auto sz = abs(ab_z) < abs(bc_z);
|
||||||
|
return Vec3<T>(select(sx,cross_ab.x,cross_bc.x),
|
||||||
|
select(sy,cross_ab.y,cross_bc.y),
|
||||||
|
select(sz,cross_ab.z,cross_bc.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline T sum ( const Vec3<T>& a ) { return a.x+a.y+a.z; }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T halfArea ( const Vec3<T>& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||||
|
template<typename T> __forceinline T area ( const Vec3<T>& d ) { return 2.0f*halfArea(d); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec3<T> normalize_safe( const Vec3<T>& a ) {
|
||||||
|
const T d = dot(a,a); return select(d == T( zero ), a , a*rsqrt(d) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& P, const Vec3<T>& Q0, const Vec3<T>& Q1)
|
||||||
|
{
|
||||||
|
const Vec3<T> N = cross(P-Q0,Q1-Q0);
|
||||||
|
const Vec3<T> D = Q1-Q0;
|
||||||
|
return dot(N,N)*rcp(dot(D,D));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& PmQ0, const Vec3<T>& Q1mQ0)
|
||||||
|
{
|
||||||
|
const Vec3<T> N = cross(PmQ0,Q1mQ0);
|
||||||
|
const Vec3<T> D = Q1mQ0;
|
||||||
|
return dot(N,N)*rcp(dot(D,D));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3<T>& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Vec3<bool > Vec3b;
|
||||||
|
typedef Vec3<int > Vec3i;
|
||||||
|
typedef Vec3<float> Vec3f;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "vec3ba.h"
|
||||||
|
#include "vec3ia.h"
|
||||||
|
#include "vec3fa.h"
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE / AVX / MIC specializations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
#include "../simd/avx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
#include "../simd/avx512.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<typename Out, typename In>
|
||||||
|
__forceinline Vec3<Out> broadcast(const Vec3<In>& a, const size_t k) {
|
||||||
|
return Vec3<Out>(Out(a.x[k]), Out(a.y[k]), Out(a.z[k]));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
|
||||||
|
|
||||||
|
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z;
|
||||||
|
}
|
||||||
|
#elif defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
template<>
|
||||||
|
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
|
||||||
|
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
template<>
|
||||||
|
__forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
|
||||||
|
return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int i0, int i1, int i2, int i3>
|
||||||
|
__forceinline Vec3<vfloat4> shuffle(const Vec3<vfloat4>& b) {
|
||||||
|
return Vec3<vfloat4>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<>
|
||||||
|
__forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
|
||||||
|
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat8>(const Vec3<vfloat8>& a, const size_t k) {
|
||||||
|
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int i0, int i1, int i2, int i3>
|
||||||
|
__forceinline Vec3<vfloat8> shuffle(const Vec3<vfloat8>& b) {
|
||||||
|
return Vec3<vfloat8>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__SSE__)
|
||||||
|
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
127
Framework/external/embree/common/math/vec3ba.h
vendored
Normal file
127
Framework/external/embree/common/math/vec3ba.h
vendored
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "vec3ba_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3ba Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3ba
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { int x,y,z; };
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef int Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba( ) {}
|
||||||
|
__forceinline Vec3ba( const __m128 input ) : m128(input) {}
|
||||||
|
__forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {}
|
||||||
|
__forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3ba( bool a )
|
||||||
|
: m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
|
||||||
|
__forceinline Vec3ba( bool a, bool b, bool c)
|
||||||
|
: m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
|
||||||
|
|
||||||
|
__forceinline operator const __m128&() const { return m128; }
|
||||||
|
__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {}
|
||||||
|
__forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
|
||||||
|
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
|
||||||
|
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators + Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7;
|
||||||
|
}
|
||||||
|
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7;
|
||||||
|
}
|
||||||
|
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reduction Operations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; }
|
||||||
|
__forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; }
|
||||||
|
|
||||||
|
__forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; }
|
||||||
|
__forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; }
|
||||||
|
__forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; }
|
||||||
|
|
||||||
|
__forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
|
||||||
|
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
115
Framework/external/embree/common/math/vec3ba_sycl.h
vendored
Normal file
115
Framework/external/embree/common/math/vec3ba_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3ba Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3ba
|
||||||
|
{
|
||||||
|
//ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
struct { bool x,y,z; };
|
||||||
|
|
||||||
|
typedef bool Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba( ) {}
|
||||||
|
//__forceinline Vec3ba( const __m128 input ) : m128(input) {}
|
||||||
|
|
||||||
|
__forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
|
||||||
|
__forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
|
||||||
|
__forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
|
||||||
|
__forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
//__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
//__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
|
||||||
|
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
|
||||||
|
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
|
||||||
|
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
|
||||||
|
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators + Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
return a.x == b.x && a.y == b.y && a.z == b.z;
|
||||||
|
}
|
||||||
|
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
return a.x != b.x || a.y != b.y || a.z != b.z;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reduction Operations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
|
||||||
|
__forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
|
||||||
|
|
||||||
|
__forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
|
||||||
|
__forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
|
||||||
|
__forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
|
||||||
|
return cout;
|
||||||
|
}
|
||||||
|
}
|
||||||
791
Framework/external/embree/common/math/vec3fa.h
vendored
Normal file
791
Framework/external/embree/common/math/vec3fa.h
vendored
Normal file
|
|
@ -0,0 +1,791 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "vec3fa_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3fa Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3fa
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { float x,y,z; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa( ) {}
|
||||||
|
__forceinline Vec3fa( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline Vec3fa ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
|
||||||
|
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; }
|
||||||
|
__forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(a)) {}
|
||||||
|
__forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
|
||||||
|
|
||||||
|
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
|
||||||
|
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
|
||||||
|
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
|
||||||
|
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec3fa load( const void* const a ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 t = _mm_load_ps((float*)a);
|
||||||
|
t[3] = 0.0f;
|
||||||
|
return Vec3fa(t);
|
||||||
|
#else
|
||||||
|
return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec3fa loadu( const void* const a ) {
|
||||||
|
return Vec3fa(_mm_loadu_ps((float*)a));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* ptr, const Vec3fa& v ) {
|
||||||
|
_mm_storeu_ps((float*)ptr,v.m128);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
|
||||||
|
__forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
|
||||||
|
__forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
__forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
|
||||||
|
__forceinline Vec3fa operator -( const Vec3fa& a ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
return vnegq_f32(a.m128);
|
||||||
|
#else
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||||
|
return _mm_xor_ps(a.m128, mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa abs ( const Vec3fa& a ) {
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
return _mm_abs_ps(a.m128);
|
||||||
|
#else
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||||
|
return _mm_and_ps(a.m128, mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa sign ( const Vec3fa& a ) {
|
||||||
|
return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa rcp ( const Vec3fa& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
return vdivq_f32(vdupq_n_f32(1.0f),a.m128);
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
const Vec3fa r = _mm_rcp14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
const Vec3fa r = _mm_rcp_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
const Vec3fa h_n = _mm_fnmadd_ps(a.m128, r.m128, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
|
||||||
|
const Vec3fa res = _mm_fmadd_ps(r.m128, h_n.m128, r.m128); // Then compute r + r * h_n
|
||||||
|
#else
|
||||||
|
const Vec3fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a.m128, r.m128)); // First, compute 1 - a * r (which will be very close to 0)
|
||||||
|
const Vec3fa res = _mm_add_ps(r.m128,_mm_mul_ps(r.m128, h_n.m128)); // Then compute r + r * h_n
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return res;
|
||||||
|
#endif //defined(__aarch64__)
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); }
|
||||||
|
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a.m128,a.m128); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa rsqrt( const Vec3fa& a )
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
|
||||||
|
return r;
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
__m128 r = _mm_rsqrt14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
|
||||||
|
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa log ( const Vec3fa& a ) {
|
||||||
|
return Vec3fa(logf(a.x),logf(a.y),logf(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa exp ( const Vec3fa& a ) {
|
||||||
|
return Vec3fa(expf(a.x),expf(a.y),expf(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
|
||||||
|
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
|
||||||
|
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
|
||||||
|
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); }
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a.m128);
|
||||||
|
const vint4 bi = _mm_castps_si128(b.m128);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a.m128);
|
||||||
|
const vint4 bi = _mm_castps_si128(b.m128);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
|
||||||
|
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__AVX2__) || defined(__ARM_NEON)
|
||||||
|
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; }
|
||||||
|
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;}
|
||||||
|
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; }
|
||||||
|
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline float reduce_add(const Vec3fa& v) {
|
||||||
|
float32x4_t t = v.m128;
|
||||||
|
t[3] = 0.0f;
|
||||||
|
return vaddvq_f32(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline float reduce_min(const Vec3fa& v) {
|
||||||
|
float32x4_t t = v.m128;
|
||||||
|
t[3] = t[2];
|
||||||
|
return vminvq_f32(t);
|
||||||
|
}
|
||||||
|
__forceinline float reduce_max(const Vec3fa& v) {
|
||||||
|
float32x4_t t = v.m128;
|
||||||
|
t[3] = t[2];
|
||||||
|
return vmaxvq_f32(t);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__forceinline float reduce_add(const Vec3fa& v) {
|
||||||
|
const vfloat4 a(v.m128);
|
||||||
|
const vfloat4 b = shuffle<1>(a);
|
||||||
|
const vfloat4 c = shuffle<2>(a);
|
||||||
|
return _mm_cvtss_f32(a+b+c);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); }
|
||||||
|
__forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
|
||||||
|
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
|
||||||
|
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); }
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpgt_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpge_ps (a.m128, b.m128); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline bool isvalid ( const Vec3fa& v ) {
|
||||||
|
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite ( const Vec3fa& a ) {
|
||||||
|
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool isvalid4 ( const Vec3fa& v ) {
|
||||||
|
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite4 ( const Vec3fa& a ) {
|
||||||
|
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b )
|
||||||
|
{
|
||||||
|
vfloat4 a0 = vfloat4(a.m128);
|
||||||
|
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
|
||||||
|
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
|
||||||
|
vfloat4 b1 = vfloat4(b.m128);
|
||||||
|
return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
|
||||||
|
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||||
|
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
|
||||||
|
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! differentiated normalization */
|
||||||
|
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
|
||||||
|
{
|
||||||
|
const float pp = dot(p,p);
|
||||||
|
const float pdp = dot(p,dp);
|
||||||
|
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
|
||||||
|
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
|
||||||
|
return blendv_ps(f.m128, t.m128, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
|
||||||
|
return blendv_ps(f.m128, t.m128, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec3fa& a )
|
||||||
|
{
|
||||||
|
const Vec3fa b = abs(a);
|
||||||
|
if (b.x > b.y) {
|
||||||
|
if (b.x > b.z) return 0; else return 2;
|
||||||
|
} else {
|
||||||
|
if (b.y > b.z) return 1; else return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline Vec3fa floor(const Vec3fa& a) { return vrndmq_f32(a.m128); }
|
||||||
|
__forceinline Vec3fa ceil (const Vec3fa& a) { return vrndpq_f32(a.m128); }
|
||||||
|
__forceinline Vec3fa trunc(const Vec3fa& a) { return vrndq_f32(a.m128); }
|
||||||
|
#elif defined (__SSE4_1__)
|
||||||
|
__forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
|
||||||
|
__forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
|
||||||
|
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
|
||||||
|
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(a.x),floorf(a.y),floorf(a.z)); }
|
||||||
|
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Vec3fa Vec3fa_t;
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3fx Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3fx
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
union {
|
||||||
|
__m128 m128;
|
||||||
|
struct { float x,y,z; union { int a; unsigned u; float w; }; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx( ) {}
|
||||||
|
__forceinline Vec3fx( const __m128 a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {}
|
||||||
|
__forceinline operator Vec3fa () const { return Vec3fa(m128); }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
|
||||||
|
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
|
||||||
|
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
|
||||||
|
__forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
|
||||||
|
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; }
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; }
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const float w1) {
|
||||||
|
#if defined (__aarch64__)
|
||||||
|
m128 = other.m128; m128[3] = w1;
|
||||||
|
#elif defined (__SSE4_1__)
|
||||||
|
m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4);
|
||||||
|
#else
|
||||||
|
const vint4 mask(-1,-1,-1,0);
|
||||||
|
m128 = select(vboolf4(_mm_castsi128_ps(mask)),vfloat4(other.m128),vfloat4(w1));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
|
||||||
|
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
|
||||||
|
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(w, z, y, x)) {}
|
||||||
|
|
||||||
|
//__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
|
||||||
|
|
||||||
|
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
|
||||||
|
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
|
||||||
|
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
|
||||||
|
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec3fx load( const void* const a ) {
|
||||||
|
return Vec3fx(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec3fx loadu( const void* const a ) {
|
||||||
|
return Vec3fx(_mm_loadu_ps((float*)a));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* ptr, const Vec3fx& v ) {
|
||||||
|
_mm_storeu_ps((float*)ptr,v.m128);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {}
|
||||||
|
__forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
|
||||||
|
__forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||||
|
__forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
|
||||||
|
__forceinline Vec3fx operator -( const Vec3fx& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||||
|
return _mm_xor_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx abs ( const Vec3fx& a ) {
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||||
|
return _mm_and_ps(a.m128, mask);
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx sign ( const Vec3fx& a ) {
|
||||||
|
return blendv_ps(Vec3fx(one).m128, (-Vec3fx(one)).m128, _mm_cmplt_ps (a.m128,Vec3fx(zero).m128));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx rcp ( const Vec3fx& a )
|
||||||
|
{
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
const Vec3fx r = _mm_rcp14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
const Vec3fx r = _mm_rcp_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
const Vec3fx res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
|
||||||
|
#else
|
||||||
|
const Vec3fx res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
|
||||||
|
//return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a.m128); }
|
||||||
|
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a.m128,a.m128); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx rsqrt( const Vec3fx& a )
|
||||||
|
{
|
||||||
|
#if defined(__AVX512VL__)
|
||||||
|
__m128 r = _mm_rsqrt14_ps(a.m128);
|
||||||
|
#else
|
||||||
|
__m128 r = _mm_rsqrt_ps(a.m128);
|
||||||
|
#endif
|
||||||
|
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
|
||||||
|
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx log ( const Vec3fx& a ) {
|
||||||
|
return Vec3fx(logf(a.x),logf(a.y),logf(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx exp ( const Vec3fx& a ) {
|
||||||
|
return Vec3fx(expf(a.x),expf(a.y),expf(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
|
||||||
|
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
|
||||||
|
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
|
||||||
|
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); }
|
||||||
|
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); }
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__) || defined(__aarch64__)
|
||||||
|
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a.m128);
|
||||||
|
const vint4 bi = _mm_castps_si128(b.m128);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__) || defined(__aarch64__)
|
||||||
|
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a.m128);
|
||||||
|
const vint4 bi = _mm_castps_si128(b.m128);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
|
||||||
|
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
|
||||||
|
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; }
|
||||||
|
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; }
|
||||||
|
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;}
|
||||||
|
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Vec3fx& v) {
|
||||||
|
const vfloat4 a(v.m128);
|
||||||
|
const vfloat4 b = shuffle<1>(a);
|
||||||
|
const vfloat4 c = shuffle<2>(a);
|
||||||
|
return _mm_cvtss_f32(a+b+c);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline float reduce_min(const Vec3fx& v) { return min(v.x,v.y,v.z); }
|
||||||
|
__forceinline float reduce_max(const Vec3fx& v) { return max(v.x,v.y,v.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
|
||||||
|
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
|
||||||
|
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
|
||||||
|
|
||||||
|
__forceinline bool isvalid ( const Vec3fx& v ) {
|
||||||
|
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite ( const Vec3fx& a ) {
|
||||||
|
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool isvalid4 ( const Vec3fx& v ) {
|
||||||
|
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite4 ( const Vec3fx& a ) {
|
||||||
|
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b )
|
||||||
|
{
|
||||||
|
vfloat4 a0 = vfloat4(a.m128);
|
||||||
|
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
|
||||||
|
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
|
||||||
|
vfloat4 b1 = vfloat4(b.m128);
|
||||||
|
return Vec3fx(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
|
||||||
|
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||||
|
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
|
||||||
|
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! differentiated normalization */
|
||||||
|
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
|
||||||
|
{
|
||||||
|
const float pp = dot(p,p);
|
||||||
|
const float pdp = dot(p,dp);
|
||||||
|
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
|
||||||
|
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
|
||||||
|
return blendv_ps(f.m128, t.m128, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
|
||||||
|
return blendv_ps(f.m128, t.m128, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec3fx& a )
|
||||||
|
{
|
||||||
|
const Vec3fx b = abs(a);
|
||||||
|
if (b.x > b.y) {
|
||||||
|
if (b.x > b.z) return 0; else return 2;
|
||||||
|
} else {
|
||||||
|
if (b.y > b.z) return 1; else return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); }
|
||||||
|
__forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); }
|
||||||
|
__forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); }
|
||||||
|
#elif defined (__SSE4_1__)
|
||||||
|
__forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
|
||||||
|
__forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
|
||||||
|
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(a.x),truncf(a.y),truncf(a.z)); }
|
||||||
|
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(a.x),floorf(a.y),floorf(a.z)); }
|
||||||
|
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef Vec3fx Vec3ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
617
Framework/external/embree/common/math/vec3fa_sycl.h
vendored
Normal file
617
Framework/external/embree/common/math/vec3fa_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,617 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3fa Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3fa
|
||||||
|
{
|
||||||
|
//ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
struct { float x,y,z, do_not_use; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa( ) {}
|
||||||
|
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
|
||||||
|
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||||
|
|
||||||
|
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
|
||||||
|
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
|
||||||
|
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
|
||||||
|
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
|
||||||
|
|
||||||
|
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec3fa load( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec3fa(ptr[0],ptr[1],ptr[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec3fa loadu( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec3fa(ptr[0],ptr[1],ptr[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
|
||||||
|
float* ptr = (float*)a;
|
||||||
|
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
|
||||||
|
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
|
||||||
|
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
|
||||||
|
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
|
||||||
|
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
|
||||||
|
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
|
||||||
|
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
|
||||||
|
|
||||||
|
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
|
||||||
|
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
|
||||||
|
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
|
||||||
|
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
|
||||||
|
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||||
|
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||||
|
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
|
||||||
|
return Vec3fa(x,y,z);
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa log ( const Vec3fa& a ) {
|
||||||
|
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa exp ( const Vec3fa& a ) {
|
||||||
|
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
|
||||||
|
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
|
||||||
|
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
|
||||||
|
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
|
||||||
|
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
|
||||||
|
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
|
||||||
|
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
|
||||||
|
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
|
||||||
|
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
|
||||||
|
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
|
||||||
|
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
|
||||||
|
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
|
||||||
|
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
|
||||||
|
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
|
||||||
|
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
|
||||||
|
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
|
||||||
|
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||||
|
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||||
|
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||||
|
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
|
||||||
|
|
||||||
|
__forceinline bool isvalid ( const Vec3fa& v ) {
|
||||||
|
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite ( const Vec3fa& a ) {
|
||||||
|
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidian Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
|
||||||
|
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
|
||||||
|
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||||
|
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
|
||||||
|
|
||||||
|
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
|
||||||
|
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! differentiated normalization */
|
||||||
|
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
|
||||||
|
{
|
||||||
|
const float pp = dot(p,p);
|
||||||
|
const float pdp = dot(p,dp);
|
||||||
|
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
|
||||||
|
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
|
||||||
|
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec3fa& a )
|
||||||
|
{
|
||||||
|
const Vec3fa b = abs(a);
|
||||||
|
if (b.x > b.y) {
|
||||||
|
if (b.x > b.z) return 0; else return 2;
|
||||||
|
} else {
|
||||||
|
if (b.y > b.z) return 1; else return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
|
||||||
|
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
|
||||||
|
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
|
||||||
|
: x(a.x), y(a.y) {}
|
||||||
|
|
||||||
|
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
|
||||||
|
: x((int)a.x), y((int)a.y), z((int)a.z) {}
|
||||||
|
|
||||||
|
typedef Vec3fa Vec3fa_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3fx Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3fx
|
||||||
|
{
|
||||||
|
//ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
typedef float Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
struct { float x,y,z; union { int a; unsigned u; float w; }; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx( ) {}
|
||||||
|
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
|
||||||
|
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
|
||||||
|
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
|
||||||
|
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
|
||||||
|
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
|
||||||
|
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
|
||||||
|
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
|
||||||
|
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
|
||||||
|
|
||||||
|
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
|
||||||
|
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
|
||||||
|
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
|
||||||
|
|
||||||
|
//__forceinline operator const __m128&() const { return m128; }
|
||||||
|
//__forceinline operator __m128&() { return m128; }
|
||||||
|
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
|
||||||
|
|
||||||
|
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Loads and Stores
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static __forceinline Vec3fx load( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline Vec3fx loadu( const void* const a ) {
|
||||||
|
const float* ptr = (const float*)a;
|
||||||
|
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
|
||||||
|
float* ptr = (float*)a;
|
||||||
|
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
|
||||||
|
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
|
||||||
|
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
|
||||||
|
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
|
||||||
|
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
|
||||||
|
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
|
||||||
|
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
|
||||||
|
|
||||||
|
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
|
||||||
|
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
|
||||||
|
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
|
||||||
|
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
|
||||||
|
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||||
|
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||||
|
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
|
||||||
|
return Vec3fx(x,y,z);
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
|
||||||
|
return rcp(zero_fix(a));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx log ( const Vec3fx& a ) {
|
||||||
|
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx exp ( const Vec3fx& a ) {
|
||||||
|
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
|
||||||
|
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
|
||||||
|
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
|
||||||
|
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
|
||||||
|
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
|
||||||
|
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
|
||||||
|
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
|
||||||
|
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
|
||||||
|
}
|
||||||
|
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
|
||||||
|
const vint4 ai = _mm_castps_si128(a);
|
||||||
|
const vint4 bi = _mm_castps_si128(b);
|
||||||
|
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||||
|
return _mm_castsi128_ps(ci);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
|
||||||
|
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
|
||||||
|
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
|
||||||
|
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
|
||||||
|
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
|
||||||
|
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
|
||||||
|
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
|
||||||
|
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
|
||||||
|
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
|
||||||
|
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
|
||||||
|
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||||
|
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||||
|
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||||
|
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
|
||||||
|
|
||||||
|
__forceinline bool isvalid ( const Vec3fx& v ) {
|
||||||
|
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite ( const Vec3fx& a ) {
|
||||||
|
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool isvalid4 ( const Vec3fx& v ) {
|
||||||
|
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
|
||||||
|
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
|
||||||
|
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
|
||||||
|
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
|
||||||
|
return valid_x & valid_y & valid_z & valid_w;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline bool is_finite4 ( const Vec3fx& v ) {
|
||||||
|
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
|
||||||
|
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
|
||||||
|
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
|
||||||
|
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
|
||||||
|
return finite_x & finite_y & finite_z & finite_w;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidian Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return reduce_add(a*b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
|
||||||
|
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
|
||||||
|
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
|
||||||
|
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
|
||||||
|
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
|
||||||
|
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||||
|
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
|
||||||
|
|
||||||
|
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
|
||||||
|
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! differentiated normalization */
|
||||||
|
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
|
||||||
|
{
|
||||||
|
const float pp = dot(p,p);
|
||||||
|
const float pdp = dot(p,dp);
|
||||||
|
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
|
||||||
|
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
|
||||||
|
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
|
||||||
|
return madd(1.0f-t,v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline int maxDim ( const Vec3fx& a )
|
||||||
|
{
|
||||||
|
const Vec3fx b = abs(a);
|
||||||
|
if (b.x > b.y) {
|
||||||
|
if (b.x > b.z) return 0; else return 2;
|
||||||
|
} else {
|
||||||
|
if (b.y > b.z) return 1; else return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Rounding Functions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
|
||||||
|
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
|
||||||
|
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Vec3fx Vec3ff;
|
||||||
|
|
||||||
|
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
|
||||||
|
// : x(a.x), y(a.y) {}
|
||||||
|
|
||||||
|
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
|
||||||
|
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
|
||||||
|
}
|
||||||
203
Framework/external/embree/common/math/vec3ia.h
vendored
Normal file
203
Framework/external/embree/common/math/vec3ia.h
vendored
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
|
||||||
|
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
# include "vec3ia_sycl.h"
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3ia Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3ia
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
union {
|
||||||
|
__m128i m128;
|
||||||
|
struct { int x,y,z; };
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef int Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia( ) {}
|
||||||
|
__forceinline Vec3ia( const __m128i a ) : m128(a) {}
|
||||||
|
__forceinline Vec3ia( const Vec3ia& other ) : m128(other.m128) {}
|
||||||
|
__forceinline Vec3ia& operator =(const Vec3ia& other) { m128 = other.m128; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3ia( const int a ) : m128(_mm_set1_epi32(a)) {}
|
||||||
|
__forceinline Vec3ia( const int x, const int y, const int z) : m128(_mm_set_epi32(z, z, y, x)) {}
|
||||||
|
__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
|
||||||
|
|
||||||
|
__forceinline operator const __m128i&() const { return m128; }
|
||||||
|
__forceinline operator __m128i&() { return m128; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia( ZeroTy ) : m128(_mm_setzero_si128()) {}
|
||||||
|
__forceinline Vec3ia( OneTy ) : m128(_mm_set1_epi32(1)) {}
|
||||||
|
__forceinline Vec3ia( PosInfTy ) : m128(_mm_set1_epi32(pos_inf)) {}
|
||||||
|
__forceinline Vec3ia( NegInfTy ) : m128(_mm_set1_epi32(neg_inf)) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a ) { return a; }
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); }
|
||||||
|
#if (defined(__aarch64__))
|
||||||
|
__forceinline Vec3ia abs ( const Vec3ia& a ) { return vabsq_s32(a.m128); }
|
||||||
|
#elif defined(__SSSE3__)
|
||||||
|
__forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return _mm_add_epi32(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return _mm_sub_epi32(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return _mm_and_si128(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return _mm_or_si128(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return _mm_xor_si128(a.m128, b.m128); }
|
||||||
|
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); }
|
||||||
|
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); }
|
||||||
|
|
||||||
|
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); }
|
||||||
|
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); }
|
||||||
|
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
|
||||||
|
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
|
||||||
|
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
|
||||||
|
|
||||||
|
#if !defined(__ARM_NEON)
|
||||||
|
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
|
||||||
|
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
|
||||||
|
#else
|
||||||
|
return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
__forceinline int reduce_add(const Vec3ia& v) { return vaddvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0))); }
|
||||||
|
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline int reduce_min(const Vec3ia& v) { return vminvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x7FFFFFFF))); }
|
||||||
|
__forceinline int reduce_max(const Vec3ia& v) { return vmaxvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x80000000))); }
|
||||||
|
#else
|
||||||
|
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
|
||||||
|
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); }
|
||||||
|
__forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) == 7; }
|
||||||
|
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) != 7; }
|
||||||
|
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); }
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); }
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__SSE4_1__)
|
||||||
|
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); }
|
||||||
|
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); }
|
||||||
|
#else
|
||||||
|
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return select(lt_mask(a,b),a,b); }
|
||||||
|
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return select(gt_mask(a,b),a,b); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
178
Framework/external/embree/common/math/vec3ia_sycl.h
vendored
Normal file
178
Framework/external/embree/common/math/vec3ia_sycl.h
vendored
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../sys/alloc.h"
|
||||||
|
#include "emath.h"
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE Vec3ia Type
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct __aligned(16) Vec3ia
|
||||||
|
{
|
||||||
|
ALIGNED_STRUCT_(16);
|
||||||
|
|
||||||
|
struct { int x,y,z; };
|
||||||
|
|
||||||
|
typedef int Scalar;
|
||||||
|
enum { N = 3 };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constructors, Assignment & Cast Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia( ) {}
|
||||||
|
//__forceinline Vec3ia( const __m128i a ) : m128(a) {}
|
||||||
|
|
||||||
|
__forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
|
||||||
|
__forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||||
|
|
||||||
|
__forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
|
||||||
|
__forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
|
||||||
|
//__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
|
||||||
|
__forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||||
|
|
||||||
|
__forceinline explicit Vec3ia( const Vec3fa& a );
|
||||||
|
|
||||||
|
//__forceinline operator const __m128i&() const { return m128; }
|
||||||
|
//__forceinline operator __m128i&() { return m128; }
|
||||||
|
__forceinline operator vint4() const { return vint4(x,y,z,z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
|
||||||
|
__forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
|
||||||
|
__forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
|
||||||
|
__forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Array Access
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||||
|
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
|
||||||
|
__forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
|
||||||
|
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
|
||||||
|
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
|
||||||
|
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
|
||||||
|
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
|
||||||
|
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
|
||||||
|
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
|
||||||
|
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
|
||||||
|
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
|
||||||
|
|
||||||
|
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
|
||||||
|
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
|
||||||
|
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
|
||||||
|
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
|
||||||
|
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
|
||||||
|
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
|
||||||
|
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
|
||||||
|
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
|
||||||
|
|
||||||
|
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
|
||||||
|
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reductions
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
|
||||||
|
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
|
||||||
|
__forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
|
||||||
|
__forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
|
||||||
|
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
|
||||||
|
|
||||||
|
/*
|
||||||
|
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||||
|
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||||
|
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
|
||||||
|
const int x = m.x ? t.x : f.x;
|
||||||
|
const int y = m.y ? t.y : f.y;
|
||||||
|
const int z = m.z ? t.z : f.z;
|
||||||
|
return Vec3ia(x,y,z);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
|
||||||
|
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
|
||||||
|
return cout;
|
||||||
|
}
|
||||||
|
}
|
||||||
266
Framework/external/embree/common/math/vec4.h
vendored
Normal file
266
Framework/external/embree/common/math/vec4.h
vendored
Normal file
|
|
@ -0,0 +1,266 @@
|
||||||
|
// Copyright 2009-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "emath.h"
|
||||||
|
#include "vec3.h"
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Generic 4D vector Class
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> struct Vec4
|
||||||
|
{
|
||||||
|
enum { N = 4 };
|
||||||
|
union {
|
||||||
|
struct { T x, y, z, w; };
|
||||||
|
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
|
||||||
|
T components[N];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef T Scalar;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Construction
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec4( ) {}
|
||||||
|
__forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {}
|
||||||
|
__forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {}
|
||||||
|
__forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
|
||||||
|
|
||||||
|
__forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; }
|
||||||
|
__forceinline Vec4( const Vec3fx& other );
|
||||||
|
|
||||||
|
template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {}
|
||||||
|
template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
|
||||||
|
|
||||||
|
__forceinline Vec4& operator =(const Vec4& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
|
||||||
|
|
||||||
|
__forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Constants
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {}
|
||||||
|
__forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {}
|
||||||
|
__forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {}
|
||||||
|
__forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {}
|
||||||
|
|
||||||
|
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
|
||||||
|
__forceinline const T& operator [](const size_t axis) const { assert(axis < 4); return (&x)[axis]; }
|
||||||
|
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return (&x)[axis]; }
|
||||||
|
#else
|
||||||
|
__forceinline const T& operator [](const size_t axis ) const { assert(axis < 4); return components[axis]; }
|
||||||
|
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return components[axis]; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Swizzles
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
__forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); }
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Unary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Binary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); }
|
||||||
|
template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Ternary Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Assignment Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; }
|
||||||
|
template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; }
|
||||||
|
template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; }
|
||||||
|
template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Reduction Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; }
|
||||||
|
template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; }
|
||||||
|
template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); }
|
||||||
|
template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Comparison Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
|
||||||
|
template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; }
|
||||||
|
template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) {
|
||||||
|
if (a.x != b.x) return a.x < b.x;
|
||||||
|
if (a.y != b.y) return a.y < b.y;
|
||||||
|
if (a.z != b.z) return a.z < b.z;
|
||||||
|
if (a.w != b.w) return a.w < b.w;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Shift Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) {
|
||||||
|
return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Euclidean Space Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
|
||||||
|
|
||||||
|
template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); }
|
||||||
|
template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); }
|
||||||
|
template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Select
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) {
|
||||||
|
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) {
|
||||||
|
return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) {
|
||||||
|
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
__forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) {
|
||||||
|
return madd(Vec4<T>(T(1.0f)-t),v0,t*v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Output Operators
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec4<T>& a) {
|
||||||
|
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Default template instantiations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
typedef Vec4<bool > Vec4b;
|
||||||
|
typedef Vec4<unsigned char> Vec4uc;
|
||||||
|
typedef Vec4<int > Vec4i;
|
||||||
|
typedef Vec4<float > Vec4f;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "vec3ba.h"
|
||||||
|
#include "vec3ia.h"
|
||||||
|
#include "vec3fa.h"
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// SSE / AVX / MIC specializations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
#include "../simd/sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __AVX__
|
||||||
|
#include "../simd/avx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __AVX512F__
|
||||||
|
#include "../simd/avx512.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace embree
|
||||||
|
{
|
||||||
|
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
|
||||||
|
|
||||||
|
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
|
||||||
|
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||||
|
}
|
||||||
|
#elif defined(__SSE__) || defined(__ARM_NEON)
|
||||||
|
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
|
||||||
|
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) {
|
||||||
|
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__SSE__)
|
||||||
|
template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX__)
|
||||||
|
template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX512F__)
|
||||||
|
template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
|
||||||
|
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue