Initial commit.

This commit is contained in:
hal8174 2024-04-23 10:14:24 +02:00
commit d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
build

View file

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(Assignment1)
add_executable(${PROJECT_NAME} "assignment1.cpp")
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)

View file

@ -0,0 +1,5 @@
int main(int argc, char** argv) {
return 0;
}

View file

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(Assignment2)
add_executable(${PROJECT_NAME} "assignment2.cpp")
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)

View file

@ -0,0 +1,4 @@
int main(int argc, char** argv) {
return 0;
}

View file

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(Assignment3)
add_executable(${PROJECT_NAME} "assignment3.cpp")
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)

View file

@ -0,0 +1,4 @@
int main(int argc, char** argv) {
return 0;
}

View file

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(Assignment4)
add_executable(${PROJECT_NAME} "assignment4.cpp")
target_link_libraries(${PROJECT_NAME} PUBLIC CGI-framework)

View file

@ -0,0 +1,5 @@
int main(int argc, char** argv) {
return 0;
}

13
CMakeLists.txt Normal file
View file

@ -0,0 +1,13 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(CGI C CXX)
add_compile_definitions(WORKING_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/Framework)
### Assignments ###
foreach (num RANGE 1 1)
add_subdirectory(${CMAKE_SOURCE_DIR}/Assignments/Assignment${num})
endforeach ()

55
Framework/CMakeLists.txt Normal file
View file

@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
project(CGI-framework C CXX)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external)
find_package(OpenGL REQUIRED)
set(IMGUI_DIR external/imgui)
set(PROJECT_INCLUDE_DIRS
include
# scenegraph
${CMAKE_CURRENT_SOURCE_DIR}
${EMBREE_INCLUDE_DIRS}
${CMAKE_CURRENT_BINARY_DIR}
${GLFW_INCLUDE}
${GLAD_INCLUDE}
${IMGUI_DIR}
${IMGUI_DIR}/backends)
set(SOURCES_IMGUI
${IMGUI_DIR}/backends/imgui_impl_glfw.cpp
${IMGUI_DIR}/backends/imgui_impl_opengl3.cpp
${IMGUI_DIR}/imgui.cpp
${IMGUI_DIR}/imgui_draw.cpp
${IMGUI_DIR}/imgui_demo.cpp
${IMGUI_DIR}/imgui_tables.cpp
${IMGUI_DIR}/imgui_widgets.cpp)
set(SOURCES_LIGHT
lights/light.cpp
lights/light.h
lights/ambient_light.cpp
lights/ambient_light.h
lights/directional_light.cpp
lights/directional_light.h
lights/point_light.cpp
lights/point_light.h
lights/quad_light.cpp
lights/quad_light.h
lights/spot_light.cpp
lights/spot_light.h
)
set(SOURCES
${SOURCES_IMGUI}
${SOURCES_LIGHT}
scenegraph/scenegraph.cpp
scenegraph/obj_loader.cpp
src/application.cpp
)
add_library(${PROJECT_NAME} ${SOURCES})
target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PUBLIC glfw OpenGL::GL glad embree)

26
Framework/external/CMakeLists.txt vendored Normal file
View file

@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.25)
project(libraries)
# glfw
set(GLFW_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glfw/include PARENT_SCOPE)
set(GLFW_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
set(GLFW_BUILD_DOCS OFF CACHE BOOL "" FORCE)
set(GLFW_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(GLFW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
add_subdirectory(glfw)
set_property(TARGET glfw PROPERTY FOLDER "external")
# embree
set(EMBREE_STATIC_LIB ON )
set(EMBREE_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/embree/include ${CMAKE_CURRENT_SOURCE_DIR}/embree/common PARENT_SCOPE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/embree)
# glad
set(GLAD_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glad/include PARENT_SCOPE)
add_subdirectory(glad)
set_property(TARGET glad PROPERTY FOLDER "external")

904
Framework/external/embree/CHANGELOG.md vendored Normal file
View file

@ -0,0 +1,904 @@
Version History
---------------
### Embree 4.3.1
- Add missing EMBREE_GEOMETRY types to embree-config.cmake
- User defined thread count now takes precedence for internal task scheduler
- Fixed static linking issue with ze_wrapper library
- Better error reporting for SYCL platform and driver problems in embree_info and tutorial apps.
- Patch to glfw source is not applied by default anymore.
- Known issue: Running Embree on Intel® Data Center GPU Max Series with 2 tiles (e.g. Intel® Data Center GPU Max 1550) requires setting the environment variable ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE.
- Known issue: Embree build using Apple Clang 15 and ARM support (via the SEE2NEON library) may cause "EXEC_BAD_INSTRUCTION" runtime exceptions. Please use Apple Clang <= 14 on macOS.
### Embree 4.3.0
- Added instance array primitive for reducing memony requirements in scenes
with large amounts of similar instances.
- Properly checks driver if L0 RTAS extension can get loaded.
- Added varying version of rtcGetGeometryTransform for ISPC.
- Fixed signature of RTCMemoryMonitorFunction for ISPC.
- Add support for ARM64 Windows platform in CMake.
### Embree 4.2.0
- SYCL version of Embree with GPU support is no longer in beta phase.
- Improved BVH build performance on many core machines for applications that oversubscribe threads.
- Added rtcGetGeometryTransformFromScene API function that can get used inside SYCL kernels.
- No longer linking to ze_loader in SYCL mode to avoid Intel(R) oneAPI Level Zero dependency
for CPU rendering.
- Releasing test package to test Embree.
### Embree 4.1.0
- Added support for Intel® Data Center GPU Max Series.
- Added ARM64 Linux support.
- Added EMBREE_BACKFACE_CULLING_SPHERES cmake option. The new cmake option defaults to OFF.
### Embree 4.0.1
- Improved performance for Tiger Lake, Comet Lake, Cannon Lake, Kaby Lake,
and Skylake client CPUs by using 256 bit SIMD instructions by default.
- Fixed broken motion blur of RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE geometry type.
- Fixed bvh build retry issue for TBB 2020.3
- Added support for Intel® Data Center GPU Flex Series
- Fixed issue on systems without a SYCL platform.
### Embree 4.0.0
- This Embree release adds support for Intel® Arc™ GPUs through SYCL.
- The SYCL support of Embree is in beta phase. Current functionality, quality,
and GPU performance may not reflect that of the final product. Please read the
documentation section "Embree SYCL Known Issues" for known limitations.
- Embree CPU support in this release as at Gold level, incorporating the same quality
and performance as previous releases.
- A small number of API changes were required to get optimal experience and
performance on the CPU and GPU. See documentation section "Upgrading from Embree 3 to
Embree 4" for details.
- rtcIntersect and rtcOccluded function arguments changed slightly.
- RTCIntersectContext is renamed to RTCRayQuery context and most members moved to
new RTCIntersectArguments and RTCOccludedArguments structures.
- rtcFilterIntersection and rtcFilterOcclusion API calls got replaced by
rtcInvokeIntersectFilterFromGeometry and rtcInvokeOccludedFilterFromGeometry API calls.
- rtcSetGeometryEnableFilterFunctionFromArguments enables argument filter functions for some geometry.
- RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER ray query flag enables argument filter functions for each geometry.
- User geometry callbacks have to return if a valid hit was found.
- Ray masking is enabled by default now as required by most users.
- The default ray mask for geometries got changed from 0xFFFFFFFF to 0x1.
- Removed ray stream API as rarely used with minimal performance benefits over packet tracing.
- Introduced rtcForwardIntersect/rtcForwardOccluded API calls to trace tail recursive rays from user geometry callback.
- The rtcGetGeometryUserDataFromScene API call got added to be used in SYCL code.
- Added support for user geometry callback function pointer passed through ray query context
- Feature flags enable reducing code complexity for optimal performance on the GPU.
- Fixed compilation issues for ARM AArch64 processor under Linux.
- Setting default frequency level to SIMD256 for ARM on all platforms.
This allows using double pumped NEON execution by enabling EMBREE_ISA_NEON2X in cmake under Linux.
- Fixed missing end caps of motion blurred line segments.
- EMBREE_ISPC_SUPPORT is turned OFF by default.
- Embree drops support of the deprecated Intel(R) Compiler. It is replaced by
the Intel(R) oneAPI DPC++/C++ Compiler on Windows and Linux and the
Intel(R) C++ Classic Compiler on MacOS (latest tested versions is 2023.0.0).
### Embree 3.13.5
- Fixed bug in bounding flat Catmull Rom curves of subdivision level 4.
- Improved self intersection avoidance for
RTC_GEOMETRY_TYPE_DISC_POINT geometry type. Intersections are
skipped if the ray origin lies inside the sphere defined by the
point primitive. Self intersection avoidance can get disabled at compile time
using the EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE cmake option.
- Fixed spatial splitting for non-planar quads.
### Embree 3.13.4
- Using 8-wide BVH and double pumped NEON instructions on Apple M1 gives 8% performance boost.
- Fixed binning related crash in SAH BVH builder.
- Added EMBREE_TBB_COMPONENT cmake option to define the component/library name of Intel® TBB (default: tbb).
- Embree supports now Intel® oneAPI DPC++/C++ Compiler 2022.0.0
### Embree 3.13.3
- Invalid multi segment motion blurred normal oriented curves are properly excluded from BVH build.
- Fixing issue with normal oriented curve construction when center curve curvature is very large.
Due to this change normal oriented curve shape changes slightly.
- Fixed crash caused by disabling a geometry and then detaching it from the scene.
- Bugfix in emulated ray packet intersection when EMBREE_RAY_PACKETS is turned off.
- Bugfix for linear quaternion interpolation fallback.
- Fixed issues with spaces in path to Embree build folder.
- Some fixes to compile Embree in SSE mode using WebAssembly.
- Bugfix for occlusion rays with grids and ray packets.
- We do no longer provide installers for Windows and macOS, please use the ZIP files instead.
- Upgrading to Intel® ISPC 1.17.0 for release build.
- Upgrading to Intel® oneTBB 2021.5.0 for release build.
### Embree 3.13.2
- Avoiding spatial split positions that are slightly out of geometry bounds.
- Introduced rtcGetGeometryThreadSafe function, which is a thread safe version of rtcGetGeometry.
- Using more accurate rcp implementation.
- Bugfix to rare corner case of high quality BVH builder.
### Embree 3.13.1
- Added support for Intel® ISPC ARM target.
- Releases upgrade to Intel® TBB 2021.3.0 and Intel® ISPC 1.16.1
### Embree 3.13.0
- Added support for Apple M1 CPUs.
- RTC_SUBDIVISION_MODE_NO_BOUNDARY now works properly for non-manifold edges.
- CMake target 'uninstall' is not defined if it already exists.
- Embree no longer reads the .embree3 config files, thus all configuration has
to get passed through the config string to rtcNewDevice.
- Releases upgrade to Intel® TBB 2021.2.0 and Intel® ISPC 1.15.0
- Intel® TBB dll is automatically copied into build folder after build on windows.
### Embree 3.12.2
- Fixed wrong uv and Ng for grid intersector in robust mode for AVX.
- Removed optimizations for Knights Landing.
- Upgrading release builds to use Intel® oneTBB 2021.1.1
### Embree 3.12.1
- Changed default frequency level to SIMD128 for Skylake, Cannon Lake, Comet Lake and Tiger Lake CPUs.
This change typically improves performance for renderers that just use SSE by maintaining higher
CPU frequencies. In case your renderer is AVX optimized you can get higher ray tracing performance
by configuring the frequency level to simd256 through passing frequency_level=simd256 to rtcNewDevice.
### Embree 3.12.0
- Added linear cone curve geometry support. In this mode a real geometric surface for curves
with linear basis is rendered using capped cones. They are discontinuous at edge boundaries.
- Enabled fast two level builder for instances when low quality build is requested.
- Bugfix for BVH build when geometries got disabled.
- Added EMBREE_BACKFACE_CULLING_CURVES cmake option. This allows for a cheaper round
linear curve intersection when correct internal tracking and back hits are not required.
The new cmake option defaults to OFF.
- User geometries with invalid bounds with lower>upper in some dimension will be ignored.
- Increased robustness for grid interpolation code and fixed returned out of range u/v
coordinates for grid primitive.
- Fixed handling of motion blur time range for sphere, discs, and oriented disc geometries.
- Fixed missing model data in releases.
- Ensure compatibility to newer versions of Intel® oneTBB.
- Motion blur BVH nodes no longer store NaN values.
### Embree 3.11.0
- Round linear curves now automatically check for the existence of left and right
connected segments if the flags buffer is empty. Left segments exist if the
segment(id-1) + 1 == segment(id) and similarly for right segments.
- Implemented the min-width feature for curves and points, which allows to increase the
radius in a distance dependent way, such that the curve or points thickness is n pixels wide.
- Round linear curves are closed now also at their start.
- Embree no longer supports Visual Studio 2013 starting with this release.
- Bugfix in subdivision tessellation level assignment for non-quad base primitives
- Small meshes are directly added to top level build phase of two-level builder to reduce memory consumption.
- Enabled fast two level builder for user geometries when low quality build is requested.
### Embree 3.10.0
- Added EMBREE_COMPACT_POLYS CMake option which enables double indexed triangle and quad
leaves to reduce memory consumption in compact mode by an additional 40% at about
15% performance impact. This new mode is disabled by default.
- Compile fix for Intel® oneTBB 2021.1-beta05
- Releases upgrade to Intel® TBB 2020.2
- Compile fix for Intel® ISPC v1.13.0
- Adding RPATH to libembree.so in releases
- Increased required CMake version to 3.1.0
- Made instID member for array of pointers ray stream layout optional again.
### Embree 3.9.0
- Added round linear curve geometry support. In this mode a real geometric surface for curves
with linear basis is rendered using capped cones with spherical filling between
the curve segments.
- Added rtcGetSceneDevice API function, that returns the device a scene got created in.
- Improved performance of round curve rendering by up to 1.8x.
- Bugfix to sphere intersection filter invocation for back hit.
- Fixed wrong assertion that triggered for invalid curves which anyway get filtered out.
- RelWithDebInfo mode no longer enables assertions.
- Fixed an issue in FindTBB.cmake that caused compile error with Debug build under Linux.
- Embree releases no longer provide RPMs for Linux. Please use the RPMs coming with the package
manager of your Linux distribution.
### Embree 3.8.0
- Added collision detection support for user geometries (see rtcCollide API function)
- Passing geomID to user geometry callbacks.
- Bugfix in AVX512VL codepath for rtcIntersect1
- For sphere geometries the intersection filter gets now invoked for
front and back hit.
- Fixed some bugs for quaternion motion blur.
- RTCRayQueryContext always non-const in Embree API
- Made RTCHit aligned to 16 bytes in Embree API
### New Features in Embree 3.7.0
- Added quaternion motion blur for correct interpolation of rotational transformations.
- Fixed wrong bounding calculations when a motion blurred instance did
instantiate a motion blurred scene.
- In robust mode the depth test consistently uses tnear <= t <= tfar now in order
to robustly continue traversal at a previous hit point
in a way that guarantees reaching all hits, even hits at the same place.
- Fixed depth test in robust mode to be precise at tnear and tfar.
- Added next_hit tutorial to demonstrate robustly collecting all hits
along a ray using multiple ray queries.
- Implemented robust mode for curves. This has a small performance impact but
fixes bounding problems with flat curves.
- Improved quality of motion blur BVH by using linear bounds during binning.
- Implemented issue with motion blur builder where number of time segments
for SAH heuristic were counted wrong due to some numerical issues.
- Fixed an accuracy issue with rendering very short fat curves.
- rtcCommitScene can now get called during rendering from multiple threads
to lazily build geometry. When Intel® TBB is used this causes a much lower overhead
than using rtcJoinCommitScene.
- Geometries can now get attached to multiple scenes at the same time, which
simplifies mapping general scene graphs to API.
- Updated to Intel® TBB 2019.9 for release builds.
- Fixed a bug in the BVH builder for Grid geometries.
- Added macOS Catalina support to Embree releases.
### New Features in Embree 3.6.1
- Restored binary compatibility between Embree 3.6 and 3.5 when single-level instancing is used.
- Fixed bug in subgrid intersector
- Removed point query alignment in Intel® ISPC header
### New Features in Embree 3.6
- Added Catmull-Rom curve types.
- Added support for multi-level instancing.
- Added support for point queries.
- Fixed a bug preventing normal oriented curves being used unless timesteps were
specified.
- Fixed bug in external BVH builder when configured for dynamic build.
- Added support for new config flag "user_threads=N" to device initialization
which sets the number of threads used by Intel® TBB but created by the user.
- Fixed automatic vertex buffer padding when using rtcSetNewGeometry API function.
### New Features in Embree 3.5.2
- Added EMBREE_API_NAMESPACE cmake option that allows to put all Embree API functions
inside a user defined namespace.
- Added EMBREE_LIBRARY_NAME cmake option that allows to rename the Embree library.
- When Embree is compiled as static library, EMBREE_STATIC_LIB has no longer to get
defined before including the Embree API headers.
- Added CPU frequency_level device configuration to allow an application to specify the
frequency level it wants to run on. This forces Embree to not use optimizations that
may reduce the CPU frequency below that level. By default Embree is configured to the
the AVX-heavy frequency level, thus if the application uses solely non-AVX code, configuring
the Embree device with "frequency_level=simd128" may give better performance.
- Fixed a bug in the spatial split builder which caused it to fail
for scenes with more than 2^24 geometries.
### New Features in Embree 3.5.1
- Fixed ray/sphere intersector to work also for non-normalized rays.
- Fixed self intersection avoidance for ray oriented discs when
non-normalized rays were used.
- Increased maximal face valence for subdiv patch to 64 and reduced stack size
requirement for subdiv patch evaluation.
### New Features in Embree 3.5.0
- Changed normal oriented curve definition to fix waving artefacts.
- Fixed bounding issue for normal oriented motion blurred curves.
- Fixed performance issue with motion blurred point geometry.
- Fixed generation of documentation with new pandoc versions.
### New Features in Embree 3.4.0
- Added point primitives (spheres, ray-oriented discs, normal-oriented discs).
- Fixed crash triggered by scenes with only invalid primitives.
- Improved robustness of quad/grid-based intersectors.
- Upgraded to Intel® TBB 2019.2 for release builds.
### New Features in Embree 3.3.0
- Added support for motion blur time range per geometry. This way geometries
can appear and disappear during the camera shutter and time steps do not have
to start and end at camera shutter interval boundaries.
- Fixed crash with pathtracer when using --triangle-sphere command line.
- Fixed crash with pathtracer when using --shader ao command line.
- Fixed tutorials showing a black window on macOS 10.14 until moved.
### New Features in Embree 3.2.4
- Fixed compile issues with ICC 2019.
- Released ZIP files for Windows are now provided in a
version linked against Visual Studio 2013 and Visual Studio 2015.
### New Features in Embree 3.2.3
- Fixed crash when using curves with RTC_SCENE_FLAG_DYNAMIC
combined with RTC_BUILD_QUALITY_MEDIUM.
### New Features in Embree 3.2.2
- Fixed intersection distance for unnormalized rays with line segments.
- Removed libmmd.dll dependency in release builds for Windows.
- Fixed detection of AppleClang compiler under MacOSX.
### New Features in Embree 3.2.1
- Bugfix in flat mode for hermite curves.
- Added EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR cmake option to
control self intersection avoidance for flat curves.
- Performance fix when instantiating motion blurred scenes. The application
should best use two (or more) time steps for an instance that instantiates
a motion blurred scene.
- Fixed AVX512 compile issue with GCC 6.1.1.
- Fixed performance issue with rtcGetGeometryUserData when used
during rendering.
- Bugfix in length of derivatives for grid geometry.
- Added BVH8 support for motion blurred curves and lines. For some workloads
this increases performance by up to 7%.
- Fixed rtcGetGeometryTransform to return the local to world transform.
- Fixed bug in multi segment motion blur that caused missing of perfectly
axis aligned geometry.
- Reduced memory consumption of small scenes by 4x.
- Reduced temporal storage of grid builder.
### New Features in Embree 3.2.0
- Improved watertightness of robust mode.
- Line segments, and other curves are now all contained in a single
BVH which improves performance when these are both used in a scene.
- Performance improvement of up to 20% for line segments.
- Bugfix to Embree2 to Embree3 conversion script.
- Added support for Hermite curve basis.
- Semantics of normal buffer for normal oriented curves has
changed to simplify usage. Please see documentation for details.
- Using GLFW and imgui in tutorials.
- Fixed floating point exception in static variable initialization.
- Fixed invalid memory access in rtcGetGeometryTransform for non-motion
blur instances.
- Improved self intersection avoidance for flat curves. Transparency rays
with tnear set to previous hit distance do not need curve radius
based self intersection avoidance as same hit is calculated again. For this
reason self intersection avoidance is now only applied to ray origin.
### New Features in Embree 3.1.0
- Added new normal-oriented curve primitive for ray tracing of grass-like
structures.
- Added new grid primitive for ray tracing tessellated and displaced surfaces
in very memory efficient manner.
- Fixed bug of ribbon curve intersector when derivative was zero.
- Installing all static libraries when EMBREE_STATIC_LIB is enabled.
- Added API functions to access topology of subdivision mesh.
- Reduced memory consumption of instances.
- Improved performance of instances by 8%.
- Reduced memory consumption of curves by up to 2x.
- Up to 5% higher performance on AVX-512 architectures.
- Added native support for multiple curve basis functions. Internal
basis conversions are no longer performed, which saves additional
memory when multiple bases are used.
- Fixed issue with non thread safe local static variable initialization
in VS2013.
- Bugfix in rtcSetNewGeometry. Vertex buffers did not get properly
overallocated.
- Replaced ImageMagick with OpenImageIO in the tutorials.
### New Features in Embree 3.0.0
- Switched to a new version of the API which provides improved
flexibility but is not backward compatible. Please see "Upgrading
from Embree 2 to Embree 3" section of the documentation for upgrade
instructions. In particular, we provide a Python script that performs
most of the transition work.
- User geometries inside an instanced scene and a top-level scene no
longer need to handle the instID field of the ray differently. They
both just need to copy the context.instID into the ray.instID field.
- Support for context filter functions that can be assigned to a ray
query.
- User geometries can now invoke filter functions using the
rtcFilterIntersection and rtcFilterOcclusion calls.
- Higher flexibility through specifying build quality per scene and
geometry.
- Geometry normal uses commonly used right-hand rule from now on.
- Added self-intersection avoidance to ribbon curves and lines.
Applications do not have to implement self-intersection workarounds
for these primitive types anymore.
- Added support for 4 billion primitives in a single scene.
- Removed the RTC_MAX_USER_VERTEX_BUFFERS and RTC_MAX_INDEX_BUFFERS
limitations.
- Reduced memory consumption by 192 bytes per instance.
- Fixed some performance issues on AVX-512 architectures.
- Individual Contributor License Agreement (ICLA) and Corporate
Contributor License Agreement (CCLA) no longer required to
contribute to the project.
### New Features in Embree 2.17.5
- Improved watertightness of robust mode.
- Fixed floating point exception in static variable initialization.
- Fixed AVX512 compile issue with GCC 6.1.1.
### New Features in Embree 2.17.4
- Fixed AVX512 compile issue with GCC 7.
- Fixed issue with not thread safe local static variable
initialization in VS2013.
- Fixed bug in the 4 and 8-wide packet intersection of instances with
multi-segment motion blur on AVX-512 architectures.
- Fixed bug in rtcOccluded4/8/16 when only AVX-512 ISA was enabled.
### New Features in Embree 2.17.3
- Fixed GCC compile warning in debug mode.
- Fixed bug of ribbon curve intersector when derivative was zero.
- Installing all static libraries when EMBREE_STATIC_LIB is enabled.
### New Features in Embree 2.17.2
- Made BVH build of curve geometry deterministic.
### New Features in Embree 2.17.1
- Improved performance of occlusion ray packets by up to 50%.
- Fixed detection of Clang for CMake 3 under MacOSX
- Fixed AVX code compilation issue with GCC 7 compiler caused by
explicit use of vzeroupper intrinsics.
- Fixed an issue where Clang address sanitizer reported an error in
the internal tasking system.
- Added fix to compile on 32 bit Linux distribution.
- Fixed some wrong relative include paths in Embree.
- Improved performance of robust single ray mode by 5%.
- Added EMBREE_INSTALL_DEPENDENCIES option (default OFF) to enable
installing of Embree dependencies.
- Fixed performance regression for occlusion ray streams.
- Reduced temporary memory requirements of BVH builder for curves and
line segments.
- Fixed performance regression for user geometries and packet ray tracing.
- Fixed bug where wrong closest hit was reported for very curvy hair segment.
### New Features in Embree 2.17.0
- Improved packet ray tracing performance for coherent rays by 10-60%
(requires RTC_INTERSECT_COHERENT flag).
- Improved ray tracing performance for incoherent rays on
AVX-512 architectures by 5%.
- Improved ray tracing performance for streams of incoherent rays
by 5-15%.
- Fixed tbb_debug.lib linking error under Windows.
- Fast coherent ray stream and packet code paths now also work in robust mode.
- Using less aggressive prefetching for large BVH nodes which
results in 1-2% higher ray tracing performance.
- Precompiled binaries have stack-protector enabled, except for
traversal kernels. BVH builders can be slightly slower due to this
change. If you want stack-protectors disabled please turn off
EMBREE_STACK_PROTECTOR in cmake and build the binaries yourself.
- When enabling ISAs individually, the 8-wide BVH was previously only
available when the AVX ISA was also selected. This issue is now
fixed, and one can enable only AVX2 and still get best
performance by using an 8-wide BVH.
- Fixed rtcOccluded1 and rtcOccluded1Ex API functions which were
broken in Intel® ISPC.
- Providing MSI installer for Windows.
### New Features in Embree 2.16.5
- Bugfix in the robust triangle intersector that rarely caused NaNs.
- Fixed bug in hybrid traversal kernel when BVH leaf was entered with no
active rays. This rarely caused crashes when used with instancing.
- Fixed bug introduced in Embree 2.16.2 which caused instancing not to
work properly when a smaller than the native SIMD width was
used in ray packet mode.
- Fixed bug in the curve geometry intersector that caused rendering
artefacts for Bézier curves with p0=p1 and/or p2=p3.
- Fixed bug in the curve geometry intersector that caused hit results
with NaNs to be reported.
- Fixed masking bug that caused rare cracks in curve geometry.
- Enabled support for SSE2 in precompiled binaries again.
### New Features in Embree 2.16.4
- Bugfix in the ribbon intersector for hair primitives. Non-normalized
rays caused wrong intersection distance to be reported.
### New Features in Embree 2.16.3
- Increased accuracy for handling subdivision surfaces. This fixes
cracks when using displacement mapping but reduces performance
at irregular vertices.
- Fixed a bug where subdivision geometry was not properly updated
when modifying only the tessellation rate and vertex array.
### New Features in Embree 2.16.2
- Fixed bug that caused NULL ray query context in intersection
filter when instancing was used.
- Fixed an issue where uv's where outside the triangle (or quad) for
very small triangles (or quads). In robust mode we improved the uv
calculation to avoid that issue, in fast mode we accept that
inconsistency for better performance.
- Changed UV encoding for non-quad subdivision patches to
allow a subpatch UV range of `[-0.5,1.5[`. Using this new encoding
one can use finite differences to calculate derivatives if required.
Please adjust your code in case you rely on the old encoding.
### New Features in Embree 2.16.1
- Workaround for compile issues with Visual Studio 2017
- Fixed bug in subdiv code for static scenes when using tessellation
levels larger than 50.
- Fixed low performance when adding many geometries to a scene.
- Fixed high memory consumption issue when using instances in
dynamic scene (by disabling two level builder for user geometries
and instances).
### New Features in Embree 2.16.0
- Improved multi-segment motion blur support for scenes with
different number of time steps per mesh.
- New top level BVH builder that improves build times and BVH quality
of two-level BVHs.
- Added support to enable only a single ISA. Previously code was
always compiled for SSE2.
- Improved single ray tracing performance for incoherent rays on
AVX-512 architectures by 5-10%.
- Improved packet/hybrid ray tracing performance for incoherent rays
on AVX-512 architectures by 10-30%.
- Improved stream ray tracing performance for coherent rays in
structure-of-pointers layout by 40-70%.
- BVH builder for compact scenes of triangles and quads needs
essentially no temporary memory anymore. This doubles the
maximal scene size that can be rendered in compact mode.
- Triangles no longer store the geometry normal in fast/default mode
which reduces memory consumption by up to 20%.
- Compact mode uses BVH4 now consistently which reduces memory
consumption by up to 10%.
- Reduced memory consumption for small scenes (of 10k-100k primitives)
and dynamic scenes.
- Improved performance of user geometries and instances through BVH8
support.
- The API supports now specifying the geometry ID of a geometry at
construction time. This way matching the geometry ID used by
Embree and the application is simplified.
- Fixed a bug that would have caused a failure of the BVH builder
for dynamic scenes when run on a machine with more then 1000 threads.
- Fixed a bug that could have been triggered when reaching the maximal
number of mappings under Linux (`vm.max_map_count`). This could have
happened when creating a large number of small static scenes.
- Added huge page support for Windows and MacOSX (experimental).
- Added support for Visual Studio 2017.
- Removed support for Visual Studio 2012.
- Precompiled binaries now require a CPU supporting at least the
SSE4.2 ISA.
- We no longer provide precompiled binaries for 32-bit on Windows.
- Under Windows one now has to use the platform toolset option in
CMake to switch to Clang or the Intel® Compiler.
- Fixed a bug for subdivision meshes when using the incoherent scene
flag.
- Fixed a bug in the line geometry intersection, that caused reporting
an invalid line segment intersection with primID -1.
- Buffer stride for vertex buffers of different time steps of triangle
and quad meshes have to be identical now.
- Fixed a bug in the curve geometry intersection code when passed a
perfect cylinder.
### New Features in Embree 2.15.0
- Added `rtcCommitJoin` mode that allows thread to join a build
operation. When using the internal tasking system this allows
Embree to solely use the threads that called `rtcCommitJoin` to
build the scene, while previously also normal worker threads
participated in the build. You should no longer use `rtcCommit`
to join a build.
- Added `rtcDeviceSetErrorFunction2` API call, which sets an error
callback function which additionally gets passed a user provided
pointer (`rtcDeviceSetErrorFunction` is now deprecated).
- Added `rtcDeviceSetMemoryMonitorFunction2` API call, which sets a
memory monitor callback function which additionally get passed a
user provided pointer. (`rtcDeviceSetMemoryMonitorFunction` is now
deprecated).
- Build performance for hair geometry improved by up to 2×.
- Standard BVH build performance increased by 5%.
- Added API extension to use internal Morton-code based builder, the
standard binned-SAH builder, and the spatial split-based SAH builder.
- Added support for BSpline hair and curves. Embree uses
either the Bézier or BSpline basis internally, and converts other
curves, which requires more memory during rendering. For reduced
memory consumption set the `EMBREE_NATIVE_SPLINE_BASIS` to the basis
your application uses (which is set to `BEZIER` by default).
- Setting the number of threads through `tbb::taskscheduler_init`
object on the application side is now working properly.
- Windows and Linux releases are build using AVX-512 support.
- Implemented hybrid traversal for hair and line segments for
improved ray packet performance.
- AVX-512 code compiles with Clang 4.0.0
- Fixed crash when ray packets were disabled in CMake.
### New Features in Embree 2.14.0
- Added `ignore_config_files` option to init flags that allows the
application to ignore Embree configuration files.
- Face-varying interpolation is now supported for subdivision surfaces.
- Up to 16 user vertex buffers are supported for vertex
attribute interpolation.
- Deprecated `rtcSetBoundaryMode` function, please use the new
`rtcSetSubdivisionMode` function.
- Added `RTC_SUBDIV_PIN_BOUNDARY` mode for handling boundaries of
subdivision meshes.
- Added `RTC_SUBDIV_PIN_ALL` mode to enforce linear interpolation
for subdivision meshes.
- Optimized object generation performance for dynamic scenes.
- Reduced memory consumption when using lots of small dynamic objects.
- Fixed bug for subdivision surfaces using low tessellation rates.
- Hair geometry now uses a new ribbon intersector that intersects with
ray-facing quads. The new intersector also returns the v-coordinate
of the hair intersection, and fixes artefacts at junction points
between segments, at the cost of a small performance hit.
- Added `rtcSetBuffer2` function, that additionally gets the number of
elements of a buffer. In dynamic scenes, this function allows to
quickly change buffer sizes, making it possible to change the number
of primitives of a mesh or the number of crease features for
subdivision surfaces.
- Added simple 'viewer_anim' tutorial for rendering key
frame animations and 'buildbench' for measuring BVH (re-)build
performance for static and dynamic scenes.
- Added more AVX-512 optimizations for future architectures.
### New Features in Embree 2.13.0
- Improved performance for compact (but not robust) scenes.
- Added robust mode for motion blurred triangles and quads.
- Added fast dynamic mode for user geometries.
- Up to 20% faster BVH build performance on the second generation
Intel® Xeon Phi™ processor codenamed Knights Landing.
- Improved quality of the spatial split builder.
- Improved performance for coherent streams of ray packets (SOA
layout), e.g. for fast primary visibility.
- Various bug fixes in tessellation cache, quad-based spatial
split builder, etc.
### New Features in Embree 2.12.0
- Added support for multi-segment motion blur for all primitive types.
- API support for stream of pointers to single rays (`rtcIntersect1Mp`
and `rtcOccluded1Mp`)
- Improved BVH refitting performance for dynamic scenes.
- Improved high-quality mode for quads (added spatial split builder
for quads)
- Faster dynamic scenes for triangle and quad-based meshes on AVX2
enabled machines.
- Performance and correctness bugfix in optimization for streams of
coherent (single) rays.
- Fixed large memory consumption (issue introduced in Embree v2.11.0).
If you use Embree v2.11.0 please upgrade to Embree v2.12.0.
- Reduced memory consumption for dynamic scenes containing small
meshes.
- Added support to start and affinitize Intel® TBB worker threads by passing
"`start_threads=1,set_affinity=1`" to `rtcNewDevice`. These settings
are recommended on systems with a high thread count.
- `rtcInterpolate2` can now be called within a displacement shader.
- Added initial support for Microsoft's Parallel Pattern Library (PPL)
as tasking system alternative (for optimal performance Intel® TBB is
highly recommended).
- Updated to Intel® TBB 2017 which is released under the Apache v2.0 license.
- Dropped support for Visual Studio 2012 Win32 compiler. Visual Studio
2012 x64 is still supported.
### New Features in Embree 2.11.0
- Improved performance for streams of coherent (single) rays flagged
with `RTC_INTERSECT_COHERENT`. For such coherent ray streams, e.g.
primary rays, the performance typically improves by 1.3-2×.
- New spatial split BVH builder for triangles, which is 2-6× faster
than the previous version and more memory conservative.
- Improved performance and scalability of all standard BVH builders on
systems with large core counts.
- Fixed `rtcGetBounds` for motion blur scenes.
- Thread affinity is now on by default when running on the latest
Intel® Xeon Phi™ processor.
- Added AVX-512 support for future Intel® Xeon processors.
### New Features in Embree 2.10.0
- Added a new curve geometry which renders the sweep surface of a
circle along a Bézier curve.
- Intersection filters can update the `tfar` ray distance.
- Geometry types can get disabled at compile time.
- Modified and extended the ray stream API.
- Added new callback mechanism for the ray stream API.
- Improved ray stream performance (up to 5-10%).
- Up to 20% faster morton builder on machines with large core counts.
- Lots of optimizations for the second generation Intel® Xeon Phi™
processor codenamed Knights Landing.
- Added experimental support for compressed BVH nodes (reduces node
size to 56-62% of uncompressed size). Compression introduces a
typical performance overhead of ~10%.
- Bugfix in backface culling mode. We do now properly cull the
backfaces and not the frontfaces.
- Feature freeze for the first generation Intel® Xeon Phi™ coprocessor
codenamed Knights Corner. We will still maintain and add bug fixes
to Embree v2.9.0, but Embree 2.10 and future versions will no longer
support it.
### New Features in Embree 2.9.0
- Improved shadow ray performance (10-100% depending on the scene).
- Added initial support for ray streams (10-30% higher performance
depending on ray coherence in the stream).
- Added support to calculate second order derivatives using the
`rtcInterpolate2` function.
- Changed the parametrization for triangular subdivision faces to
the same scheme used for pentagons.
- Added support to query the Embree configuration using the
`rtcDeviceGetParameter` function.
### New Features in Embree 2.8.1
- Added support for setting per geometry tessellation rate (supported
for subdivision and Bézier geometries).
- Added support for motion blurred instances.
### New Features in Embree 2.8.0
- Added support for line segment geometry.
- Added support for quad geometry (replaces triangle-pairs feature).
- Added support for linear motion blur of user geometries.
- Improved performance through AVX-512 optimizations.
- Improved performance of lazy scene build (when using Intel® TBB 4.4 update
2).
- Improved performance through huge page support under linux.
### New Features in Embree 2.7.1
- Internal tasking system supports cancellation of build operations.
- Intel® ISPC mode for robust and compact scenes got significantly faster
(implemented hybrid traversal for bvh4.triangle4v and
bvh4.triangle4i).
- Hair rendering got faster as we fixed some issues with the SAH
heuristic cost factors.
- BVH8 got slight faster for single ray traversal (improved sorting
when hitting more than 4 boxes).
- BVH build performance got up to 30% faster on CPUs with high core
counts (improved parallel partition code).
- High quality build mode again working properly (spatial splits had
been deactivated in v2.7.0 due to some bug).
- Support for merging two adjacent triangles sharing a common edge
into a triangle-pair primitive (can reduce memory consumption and
BVH build times by up to 50% for mostly quad-based input meshes).
- Internal cleanups (reduced number of traversal kernels by more
templating).
- Reduced stack size requirements of BVH builders.
- Fixed crash for dynamic scenes, triggered by deleting all
geometries from the scene.
### New Features in Embree 2.7.0
- Added device concept to Embree to allow different components of an
application to use Embree without interfering with each other.
- Fixed memory leak in twolevel builder used for dynamic scenes.
- Fixed bug in tessellation cache that caused crashes for subdivision
surfaces.
- Fixed bug in internal task scheduler that caused deadlocks when
using `rtcCommitThread`.
- Improved hit-distance accuracy for thin triangles in robust mode.
- Added support to disable ray packet support in cmake.
### New Features in Embree 2.6.2
- Fixed bug triggered by instantiating motion blur geometry.
- Fixed bug in hit UV coordinates of static subdivision geometries.
- Performance improvements when only changing tessellation levels for
subdivision geometry per frame.
- Added ray packet intersectors for subdivision geometry, resulting in
improved performance for coherent rays.
- Reduced virtual address space usage for static geometries.
- Fixed some AVX2 code paths when compiling with GCC or Clang.
- Bugfix for subdiv patches with non-matching winding order.
- Bugfix in ISA detection of AVX-512.
### New Features in Embree 2.6.1
- Major performance improvements for ray tracing subdivision surfaces,
e.g. up to 2× faster for scenes where only the tessellation levels
are changing per frame, and up to 3× faster for scenes with lots of
crease features
- Initial support for architectures supporting the new 16-wide AVX-512
ISA
- Implemented intersection filter callback support for subdivision
surfaces
- Added `RTC_IGNORE_INVALID_RAYS` CMake option which makes the ray
intersectors more robust against full tree traversal caused by
invalid ray inputs (e.g. INF, NaN, etc)
### New Features in Embree 2.6.0
- Added `rtcInterpolate` function to interpolate per vertex
attributes
- Added `rtcSetBoundaryMode` function that can be used to select the
boundary handling for subdivision surfaces
- Fixed a traversal bug that caused rays with very small ray
direction components to miss geometry
- Performance improvements for the robust traversal mode
- Fixed deadlock when calling `rtcCommit` from multiple
threads on same scene
### New Features in Embree 2.5.1
- On dual socket workstations, the initial BVH build performance
almost doubled through a better memory allocation scheme
- Reduced memory usage for subdivision surface objects with crease
features
- `rtcCommit` performance is robust against unset "flush to zero" and
"denormals are zero" flags. However, enabling these flags in your
application is still recommended
- Reduced memory usage for subdivision surfaces with borders and
infinitely sharp creases
- Lots of internal cleanups and bug fixes for both Intel® Xeon® and
Intel® Xeon Phi™
### New Features in Embree 2.5.0
- Improved hierarchy build performance on both Intel Xeon and Intel
Xeon Phi
- Vastly improved tessellation cache for ray tracing subdivision
surfaces
- Added `rtcGetUserData` API call to query per geometry user pointer
set through `rtcSetUserData`
- Added support for memory monitor callback functions to track and
limit memory consumption
- Added support for progress monitor callback functions to track build
progress and cancel long build operations
- BVH builders can be used to build user defined hierarchies inside
the application (see tutorial [BVH Builder])
- Switched to Intel® TBB as default tasking system on Xeon to get even faster
hierarchy build times and better integration for applications that
also use Intel® TBB
- `rtcCommit` can get called from multiple Intel® TBB threads to join the
hierarchy build operations
### New Features in Embree 2.4
- Support for Catmull Clark subdivision surfaces (triangle/quad base
primitives)
- Support for vector displacements on Catmull Clark subdivision
surfaces
- Various bug fixes (e.g. 4-byte alignment of vertex buffers works)
### New Features in Embree 2.3.3
- BVH builders more robustly handle invalid input data (Intel Xeon
processor family)
- Motion blur support for hair geometry (Xeon)
- Improved motion blur performance for triangle geometry (Xeon)
- Improved robust ray tracing mode (Xeon)
- Added `rtcCommitThread` API call for easier integration into
existing tasking systems (Xeon and Intel Xeon Phi coprocessor)
- Added support for recording and replaying all
`rtcIntersect`/`rtcOccluded` calls (Xeon and Xeon Phi)
### New Features in Embree 2.3.2
- Improved mixed AABB/OBB-BVH for hair geometry (Xeon Phi)
- Reduced amount of pre-allocated memory for BVH builders (Xeon Phi)
- New 64-bit Morton code-based BVH builder (Xeon Phi)
- (Enhanced) Morton code-based BVH builders use now tree rotations to
improve BVH quality (Xeon Phi)
- Bug fixes (Xeon and Xeon Phi)
### New Features in Embree 2.3.1
- High quality BVH mode improves spatial splits which result in up to
30% performance improvement for some scenes (Xeon)
- Compile time enabled intersection filter functions do not reduce
performance if no intersection filter is used in the scene (Xeon and
Xeon Phi)
- Improved ray tracing performance for hair geometry by \>20% on Xeon
Phi. BVH for hair geometry requires 20% less memory
- BVH8 for AVX/AVX2 targets improves performance for single ray
tracing on Haswell by up to 12% and by up to 5% for hybrid (Xeon)
- Memory conservative BVH for Xeon Phi now uses BVH node quantization
to lower memory footprint (requires half the memory footprint of the
default BVH)
### New Features in Embree 2.3
- Support for ray tracing hair geometry (Xeon and Xeon Phi)
- Catching errors through error callback function
- Faster hybrid traversal (Xeon and Xeon Phi)
- New memory conservative BVH for Xeon Phi
- Faster Morton code-based builder on Xeon
- Faster binned-SAH builder on Xeon Phi
- Lots of code cleanups/simplifications/improvements (Xeon and Xeon
Phi)
### New Features in Embree 2.2
- Support for motion blur on Xeon Phi
- Support for intersection filter callback functions
- Support for buffer sharing with the application
- Lots of AVX2 optimizations, e.g. \~20% faster 8-wide hybrid
traversal
- Experimental support for 8-wide (AVX/AVX2) and 16-wide BVHs (Xeon
Phi)
### New Features in Embree 2.1
- New future proof API with a strong focus on supporting dynamic
scenes
- Lots of optimizations for 8-wide AVX2 (Haswell architecture)
- Automatic runtime code selection for SSE, AVX, and AVX2
- Support for user-defined geometry
- New and improved BVH builders:
- Fast adaptive Morton code-based builder (without SAH-based
top-level rebuild)
- Both the SAH and Morton code-based builders got faster (Xeon
Phi)
- New variant of the SAH-based builder using triangle pre-splits
(Xeon Phi)
### New Features in Embree 2.0
- Support for the Intel® Xeon Phi™ coprocessor platform
- Support for high-performance "packet" kernels on SSE, AVX, and Xeon
Phi
- Integration with the Intel® Implicit SPMD Program Compiler (Intel® ISPC)
- Instantiation and fast BVH reconstruction
- Example photo-realistic rendering engine for both C++ and Intel® ISPC

710
Framework/external/embree/CMakeLists.txt vendored Normal file
View file

@ -0,0 +1,710 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
CMAKE_MINIMUM_REQUIRED(VERSION 3.10.0)
SET(EMBREE_VERSION_MAJOR 4)
SET(EMBREE_VERSION_MINOR 3)
SET(EMBREE_VERSION_PATCH 1)
SET(EMBREE_VERSION_NOTE "")
SET(EMBREE_VERSION ${EMBREE_VERSION_MAJOR}.${EMBREE_VERSION_MINOR}.${EMBREE_VERSION_PATCH})
MATH(EXPR EMBREE_VERSION_NUMBER "10000*${EMBREE_VERSION_MAJOR} + 100*${EMBREE_VERSION_MINOR} + ${EMBREE_VERSION_PATCH}")
SET(CPACK_RPM_PACKAGE_RELEASE 1)
PROJECT(embree${EMBREE_VERSION_MAJOR})
SET(EMBREE_PROJECT_COMPILATION ON)
include(CMakeDependentOption)
# We use our own strip tool on macOS to sign during install. This is required as CMake modifies RPATH of the binary during install.
IF (APPLE AND EMBREE_SIGN_FILE)
SET(EMBREE_STRIP ${CMAKE_STRIP})
SET(CMAKE_STRIP "${PROJECT_BINARY_DIR}/post_install_target.sh")
CONFIGURE_FILE(scripts/post_install_target.sh.in "${PROJECT_BINARY_DIR}/post_install_target.sh" @ONLY)
ENDIF()
MACRO (SIGN_TARGET target)
IF (EMBREE_SIGN_FILE)
IF (WIN32)
# on Windows we sign and do not strip as debug symbols not included in binary
ADD_CUSTOM_COMMAND(TARGET ${target} POST_BUILD
COMMAND ${EMBREE_SIGN_FILE} $<TARGET_FILE:${target}>)
ELSEIF (APPLE)
# on MacOSX we strip and sign here for testing purposes but also during install, as CMake modifies binary during install
ADD_CUSTOM_COMMAND(TARGET ${target} POST_BUILD
COMMAND ${EMBREE_STRIP} -x $<TARGET_FILE:${target}>
COMMAND ${EMBREE_SIGN_FILE} -o runtime -e "${CMAKE_SOURCE_DIR}/common/cmake/embree.entitlements" $<TARGET_FILE:${target}>)
ELSE()
# on Linux signing of binaries is not supported and stripping is done during install
ENDIF()
ENDIF()
ENDMACRO()
# find git version
IF(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
FIND_PACKAGE(Git)
IF(GIT_FOUND)
EXECUTE_PROCESS(
COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE "EMBREE_HASH"
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
ELSE()
SET(EMBREE_HASH 0)
ENDIF()
ENDIF()
IF(COMMAND cmake_policy)
if (POLICY CMP0003)
cmake_policy(SET CMP0003 NEW)
endif()
if (POLICY CMP0042)
cmake_policy(SET CMP0042 NEW)
endif()
if(POLICY CMP0072)
cmake_policy(SET CMP0072 NEW)
endif()
if(POLICY CMP0022)
cmake_policy(SET CMP0022 NEW)
endif()
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()
ENDIF()
MARK_AS_ADVANCED(CMAKE_BACKWARDS_COMPATIBILITY)
MARK_AS_ADVANCED(EXECUTABLE_OUTPUT_PATH)
MARK_AS_ADVANCED(LIBRARY_OUTPUT_PATH)
MARK_AS_ADVANCED(CMAKE_OSX_ARCHITECTURES)
MARK_AS_ADVANCED(CMAKE_OSX_DEPLOYMENT_TARGET)
MARK_AS_ADVANCED(CMAKE_OSX_SYSROOT)
MARK_AS_ADVANCED(CLEAR CMAKE_CXX_COMPILER)
SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/common/cmake" ${CMAKE_MODULE_PATH})
INCLUDE(test)
set(BUILD_DOC OFF CACHE INTERNAL "build documentation (internal only)")
IF (BUILD_DOC)
ADD_SUBDIRECTORY(doc)
ENDIF()
OPTION(EMBREE_TUTORIALS "Enable to build Embree tutorials" ON)
##############################################################
# Embree configuration
##############################################################
GET_PROPERTY(SHARED_LIBS_SUPPORTED GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
IF(SHARED_LIBS_SUPPORTED)
OPTION(EMBREE_STATIC_LIB "Build Embree as a static library.")
MARK_AS_ADVANCED(CLEAR EMBREE_STATIC_LIB)
ELSE()
SET(EMBREE_STATIC_LIB ON CACHE BOOL "Build Embree as a static library." FORCE)
MARK_AS_ADVANCED(EMBREE_STATIC_LIB)
ENDIF()
IF (EMBREE_STATIC_LIB)
SET(EMBREE_LIB_TYPE STATIC)
ADD_DEFINITIONS(-DEMBREE_STATIC_LIB)
ELSE()
SET(EMBREE_LIB_TYPE SHARED)
ENDIF()
OPTION(EMBREE_ISPC_SUPPORT "Build Embree with support for ISPC applications." OFF)
IF (EMSCRIPTEN)
SET(EMBREE_ISPC_SUPPORT OFF CACHE BOOL "Build Embree with support for ISPC applications." FORCE)
ENDIF()
SET(EMBREE_API_NAMESPACE "" CACHE STRING "C++ namespace to put API symbols into.")
SET(EMBREE_LIBRARY_NAME "embree${EMBREE_VERSION_MAJOR}" CACHE STRING "Name of the embree library file (default is embree${EMBREE_VERSION_MAJOR})")
IF (EMBREE_API_NAMESPACE AND EMBREE_ISPC_SUPPORT)
MESSAGE(FATAL_ERROR "You cannot enable ISPC when EMBREE_API_NAMESPACE is used.")
ENDIF()
##############################################################################
# Configurations (add configurations also to common/cmake/embree-config.cmake)
OPTION(EMBREE_ZIP_MODE "Create Embree ZIP package" ON)
MARK_AS_ADVANCED(EMBREE_ZIP_MODE)
OPTION(EMBREE_INSTALL_DEPENDENCIES "Install Embree dependencies in binary packages and install" OFF)
MARK_AS_ADVANCED(EMBREE_INSTALL_DEPENDENCIES)
OPTION(EMBREE_STAT_COUNTERS "Enables statistic counters.")
OPTION(EMBREE_STACK_PROTECTOR "When enabled Embree compiles with stack protection against return address overrides." OFF)
IF (NOT APPLE)
OPTION(EMBREE_SYCL_SUPPORT "Enables SYCL GPU support." OFF)
ENDIF()
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_LARGEGRF "Enables SYCL large GRF support." OFF "EMBREE_SYCL_SUPPORT" OFF)
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_DBG "Enables DPC++ debug info." OFF "EMBREE_SYCL_SUPPORT" OFF)
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_GEOMETRY_CALLBACK "Enabled geometry callbacks which are disabled by default for SYCL." OFF "EMBREE_SYCL_SUPPORT" OFF)
MARK_AS_ADVANCED(EMBREE_SYCL_LARGEGRF)
MARK_AS_ADVANCED(EMBREE_SYCL_DBG)
MARK_AS_ADVANCED(EMBREE_SYCL_GEOMETRY_CALLBACK)
IF (EMBREE_SYCL_GEOMETRY_CALLBACK)
message(WARNING "Enabling EMBREE_SYCL_GEOMETRY_CALLBACK is experimental and may result in poor performance.")
ENDIF()
IF (EMBREE_SYCL_SUPPORT)
ADD_DEFINITIONS("-DEMBREE_SYCL_SUPPORT") # FIXME: only use define from rtcore_config.h
ENDIF()
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_RT_SIMULATION "Using hardware simulation" OFF "EMBREE_SYCL_SUPPORT" OFF)
MARK_AS_ADVANCED(EMBREE_SYCL_RT_SIMULATION)
IF (EMBREE_SYCL_RT_SIMULATION AND NOT EMBREE_SYCL_RT_VALIDATION_API AND EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
MESSAGE(FATAL_ERROR "Using EMBREE_SYCL_RT_SIMULATION requires EMBREE_SYCL_RT_VALIDATION_API=ON and EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS=OFF")
ENDIF()
IF (EMBREE_SYCL_RT_SIMULATION)
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_SIMULATION")
ENDIF()
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF "EMBREE_SYCL_SUPPORT" OFF)
IF (EMBREE_SYCL_RT_VALIDATION_API)
ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API")
ENDIF()
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON "EMBREE_SYCL_RT_VALIDATION_API" OFF)
IF (EMBREE_SYCL_RT_VALIDATION_API AND NOT EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS")
ENDIF()
MARK_AS_ADVANCED(EMBREE_SYCL_RT_VALIDATION_API)
MARK_AS_ADVANCED(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS)
IF (EMBREE_SYCL_SUPPORT)
SET(EMBREE_SYCL_AOT_DEVICES "none" CACHE STRING "SYCL devices to use for AOT compilation")
SET_PROPERTY(CACHE EMBREE_SYCL_AOT_DEVICES PROPERTY STRINGS none dg2 pvc XE_HPG_CORE XE_HPC_CORE)
SET(EMBREE_SYCL_AOT_DEVICE_REVISION 0)
IF (EMBREE_SYCL_AOT_DEVICES STREQUAL "dg2")
SET(EMBREE_SYCL_AOT_DEVICE_REVISION 4) # FIXME: bug workaround should get removed
ENDIF()
ENDIF()
CMAKE_DEPENDENT_OPTION(EMBREE_SYCL_L0_RTAS_BUILDER "Enable Level Zero RTAS builder" OFF "EMBREE_SYCL_SUPPORT" OFF)
IF (EMBREE_SYCL_L0_RTAS_BUILDER)
ADD_DEFINITIONS("-DEMBREE_SYCL_L0_RTAS_BUILDER")
ENDIF()
OPTION(EMBREE_RAY_MASK "Enables ray mask support." ON)
OPTION(EMBREE_BACKFACE_CULLING "Enables backface culling.")
OPTION(EMBREE_BACKFACE_CULLING_CURVES "Enables backface culling for curve primitives." OFF)
OPTION(EMBREE_BACKFACE_CULLING_SPHERES "Enables backface culling for sphere primitives." OFF)
OPTION(EMBREE_FILTER_FUNCTION "Enables filter functions." ON)
OPTION(EMBREE_IGNORE_INVALID_RAYS "Ignores invalid rays." OFF) # FIXME: enable by default?
OPTION(EMBREE_COMPACT_POLYS "Enables double indexed poly layout." OFF)
OPTION(EMBREE_GEOMETRY_TRIANGLE "Enables support for triangle geometries." ON)
OPTION(EMBREE_GEOMETRY_QUAD "Enables support for quad geometries." ON)
OPTION(EMBREE_GEOMETRY_CURVE "Enables support for curve geometries." ON)
OPTION(EMBREE_GEOMETRY_SUBDIVISION "Enables support for subdiv geometries." ON)
OPTION(EMBREE_GEOMETRY_USER "Enables support for user geometries." ON)
OPTION(EMBREE_GEOMETRY_INSTANCE "Enables support for instances." ON)
OPTION(EMBREE_GEOMETRY_INSTANCE_ARRAY "Enables support for instance arrays." ON)
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT 1 CACHE STRING "Maximum number of instance levels.")
IF (NOT EMBREE_GEOMETRY_INSTANCE AND NOT EMBREE_GEOMETRY_INSTANCE_ARRAY AND NOT EMBREE_MAX_INSTANCE_LEVEL_COUNT EQUAL 1)
message(FATAL_ERROR "EMBREE_MAX_INSTANCE_LEVEL_COUNT must be 1 when EMBREE_GEOMETRY_INSTANCE and EMBREE_GEOMETRY_INSTANCE_ARRAY are disabled")
ENDIF()
OPTION(EMBREE_GEOMETRY_GRID "Enables support for grid geometries." ON)
OPTION(EMBREE_GEOMETRY_POINT "Enables support for point geometries." ON)
OPTION(EMBREE_RAY_PACKETS "Enabled support for ray packets." ON)
SET(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0 CACHE STRING "Self intersection avoidance factor for flat curves. Specify floating point value in range 0 to inf.")
OPTION(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE "Enables self intersection avoidance for ray oriented discs." ON)
OPTION(EMBREE_MIN_WIDTH "Enables min-width feature to enlarge curve and point thickness to pixel width." OFF)
##############################################################
# Platform detection and defaults
##############################################################
# detect ARM compilation
IF (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64"))
MESSAGE(STATUS "Building for Apple silicon")
SET(EMBREE_ARM ON)
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
MESSAGE(STATUS "Building for AArch64")
SET(EMBREE_ARM ON)
ENDIF()
SET(EMBREE_TASKING_SYSTEM "INTERNAL" CACHE STRING "Selects tasking system")
#SET(EMBREE_TBB_COMPONENT "tbb" CACHE STRING "The TBB component/library name.")
IF (WIN32)
SET_PROPERTY(CACHE EMBREE_TASKING_SYSTEM PROPERTY STRINGS TBB INTERNAL PPL)
ELSE()
SET_PROPERTY(CACHE EMBREE_TASKING_SYSTEM PROPERTY STRINGS TBB INTERNAL)
ENDIF()
IF (EMBREE_TASKING_SYSTEM STREQUAL "TBB")
SET(TASKING_TBB ON )
SET(TASKING_INTERNAL OFF)
SET(TASKING_PPL OFF )
ADD_DEFINITIONS(-DTASKING_TBB)
LIST(APPEND ISPC_DEFINITIONS -DTASKING_TBB)
ELSEIF (EMBREE_TASKING_SYSTEM STREQUAL "PPL")
SET(TASKING_PPL ON )
SET(TASKING_TBB OFF )
SET(TASKING_INTERNAL OFF)
ADD_DEFINITIONS(-DTASKING_PPL)
LIST(APPEND ISPC_DEFINITIONS -DTASKING_PPL)
ELSE()
SET(TASKING_INTERNAL ON )
SET(TASKING_TBB OFF)
SET(TASKING_PPL OFF )
ADD_DEFINITIONS(-DTASKING_INTERNAL)
LIST(APPEND ISPC_DEFINITIONS -DTASKING_INTERNAL)
ENDIF()
##############################################################
# Compiler
##############################################################
# TODO: fixme, define relwithassert also for icc, msvc, ... or remove when debug build with dpcpp is useable
IF(EMBREE_SYCL_SUPPORT)
SET(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;RelWithAssert")
ELSE()
SET(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo")
ENDIF()
# Validate build type
IF (CMAKE_BUILD_TYPE)
string(FIND "${CONFIGURATION_TYPES}" "${CMAKE_BUILD_TYPE}" MATCHED_CONFIG)
IF (${MATCHED_CONFIG} EQUAL -1)
message(FATAL_ERROR "CMAKE_BUILD_TYPE (${CMAKE_BUILD_TYPE}) allows only the following values: ${CONFIGURATION_TYPES}")
ENDIF()
ENDIF()
message(DEBUG "CMAKE_GENERATOR_TOOLSET: ${CMAKE_GENERATOR_TOOLSET}")
message(DEBUG "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
message(DEBUG "CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}")
IF (WIN32)
IF (NOT DEFAULT_CMAKE_CONFIGURATION_TYPES_SET)
SET(CMAKE_CONFIGURATION_TYPES "${CONFIGURATION_TYPES}" CACHE STRING "List of generated configurations." FORCE)
SET(DEFAULT_CMAKE_CONFIGURATION_TYPES_SET ON CACHE INTERNAL "Default CMake configuration types set.")
ENDIF()
SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON)
IF ((${CMAKE_CXX_COMPILER_ID} MATCHES "IntelLLVM") OR
(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" AND ${CMAKE_CXX_COMPILER} MATCHES "icx") OR
(${CMAKE_CXX_COMPILER} MATCHES "dpcpp"))
MESSAGE("-- DPCPP compiler detected")
INCLUDE(dpcpp)
ELSEIF(${CMAKE_GENERATOR_TOOLSET} MATCHES "^LLVM")
set(EMBREE_SYCL_SUPPORT OFF)
MESSAGE("-- CLANG compiler detected")
INCLUDE(clang)
ELSEIF(${CMAKE_GENERATOR_TOOLSET} MATCHES "^Intel")
set(EMBREE_SYCL_SUPPORT OFF)
MESSAGE("-- Intel compiler detected")
INCLUDE (intel)
ELSEIF(${CMAKE_CXX_COMPILER_FRONTEND_VARIANT} MATCHES "GNU")
set(EMBREE_SYCL_SUPPORT OFF)
MESSAGE("-- GNU-like compiler detected")
INCLUDE(gnu)
ELSE()
set(EMBREE_SYCL_SUPPORT OFF)
IF (EMBREE_ISA_AVX512)
MESSAGE(FATAL_ERROR "Microsoft Visual C++ Compiler does not support AVX512. Please use Intel Compiler or Clang.")
ENDIF()
MESSAGE("-- MSVC detected")
INCLUDE (msvc)
ENDIF()
ELSE (WIN32)
IF(CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
INCLUDE (crayprgenv)
ELSE()
GET_FILENAME_COMPONENT(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME)
STRING(TOLOWER "${CMAKE_CXX_COMPILER_ID}" _LOWER_CXX_COMPILER_ID)
STRING(REPLACE "appleclang" "clang" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
STRING(REPLACE "intelllvm" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
IF(${_LOWER_CXX_COMPILER_ID} MATCHES "clang" AND ${CXX_COMPILER_NAME} MATCHES "icpx")
STRING(REPLACE "clang" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
ENDIF()
IF(${_LOWER_CXX_COMPILER_ID} MATCHES "clang" AND ${CXX_COMPILER_NAME} MATCHES "dpcpp")
STRING(REPLACE "clang" "dpcpp" _LOWER_CXX_COMPILER_ID ${_LOWER_CXX_COMPILER_ID})
ENDIF()
IF (EMBREE_SYCL_SUPPORT)
SET(_LOWER_CXX_COMPILER_ID "dpcpp")
ENDIF()
STRING(TOUPPER "${_LOWER_CXX_COMPILER_ID}" _UPPER_CXX_COMPILER_ID)
MESSAGE("-- ${_UPPER_CXX_COMPILER_ID} detected")
INCLUDE(${_LOWER_CXX_COMPILER_ID} OPTIONAL RESULT_VARIABLE COMPILER_FOUND)
IF (NOT COMPILER_FOUND)
MESSAGE(FATAL_ERROR "Unsupported compiler: " ${CMAKE_CXX_COMPILER_ID})
ENDIF ()
ENDIF ()
IF(NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Specifies the build type." FORCE)
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CONFIGURATION_TYPES})
ENDIF(NOT CMAKE_BUILD_TYPE)
OPTION(CMAKE_VERBOSE_MAKEFILE "Enables verbose mode.")
MARK_AS_ADVANCED(CLEAR CMAKE_VERBOSE_MAKEFILE)
ENDIF (WIN32)
##############################################################
# ISA configuration
##############################################################
# just for compatibility with old naming
IF(DEFINED EMBREE_ISA_AVX512SKX)
UNSET(EMBREE_ISA_AVX512 CACHE)
SET(EMBREE_ISA_AVX512 ${EMBREE_ISA_AVX512SKX} CACHE BOOL "")
ENDIF()
IF (CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
SET(EMBREE_MAX_ISA "DEFAULT" CACHE STRING "Selects highest ISA to support.")
ELSEIF (EMSCRIPTEN)
SET(EMBREE_MAX_ISA "SSE2" CACHE STRING "Selects highest ISA to support.")
ELSE()
SET(EMBREE_MAX_ISA "NONE" CACHE STRING "Selects highest ISA to support.")
ENDIF()
IF (EMBREE_ARM)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X)
ELSE()
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT)
ENDIF()
IF (EMBREE_MAX_ISA STREQUAL "NONE")
IF (EMBREE_ARM)
IF (APPLE)
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." OFF)
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." ON)
ELSE()
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." ON)
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF)
ENDIF()
ELSE()
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
TRY_COMPILE(COMPILER_SUPPORTS_AVX512 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX512})
OPTION(EMBREE_ISA_SSE2 "Enables SSE2 ISA." ON)
OPTION(EMBREE_ISA_SSE42 "Enables SSE4.2 ISA." ON)
OPTION(EMBREE_ISA_AVX "Enables AVX ISA." ${COMPILER_SUPPORTS_AVX})
OPTION(EMBREE_ISA_AVX2 "Enables AVX2 ISA." ${COMPILER_SUPPORTS_AVX2})
IF (WIN32 OR APPLE)
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." OFF)
ELSE()
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." ${COMPILER_SUPPORTS_AVX512})
ENDIF()
# Don't use OPTION, but still set them to OFF, so that embree-config.cmake is consisten with its definitions
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
ENDIF()
ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
SET(EMBREE_ISA_AVX2 OFF)
SET(EMBREE_ISA_AVX512 OFF)
MESSAGE(STATUS "Detecting default ISA...")
INCLUDE(check_isa_default)
CHECK_ISA_DEFAULT(EMBREE_ISA_DEFAULT)
MESSAGE(STATUS "Detected default ISA: ${EMBREE_ISA_DEFAULT}")
SET(EMBREE_ISA_${EMBREE_ISA_DEFAULT} ON)
ELSE()
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
IF(EMBREE_MAX_ISA STREQUAL "NEON")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "NEON2X")
SET(ISA 2)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE2")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE4.2")
SET(ISA 2)
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX")
SET(ISA 3)
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX2")
SET(ISA 4)
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX512")
SET(ISA 5)
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX512SKX") # just for compatibility
SET(ISA 5)
ELSE()
MESSAGE(FATAL_ERROR "Unsupported ISA specified: " ${EMBREE_MAX_ISA})
ENDIF()
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
SET(EMBREE_ISA_AVX2 OFF)
SET(EMBREE_ISA_AVX512 OFF)
IF (EMBREE_ARM)
IF (ISA GREATER 0)
SET(EMBREE_ISA_NEON ON)
ENDIF ()
IF (ISA GREATER 1)
SET(EMBREE_ISA_NEON2X ON)
ENDIF ()
ELSE()
IF (ISA GREATER 0)
SET(EMBREE_ISA_SSE2 ON)
ENDIF ()
IF (ISA GREATER 1)
SET(EMBREE_ISA_SSE42 ON)
ENDIF ()
IF (ISA GREATER 2)
SET(EMBREE_ISA_AVX ON)
ENDIF ()
IF (ISA GREATER 3)
SET(EMBREE_ISA_AVX2 ON)
ENDIF ()
IF (ISA GREATER 4)
SET(EMBREE_ISA_AVX512 ON)
ENDIF ()
ENDIF()
ENDIF()
IF(CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
SET(EMBREE_ISA_SSE42 OFF)
ENDIF()
IF (APPLE AND EMBREE_STATIC_LIB)
# count number of set ISAs
SET(NUMISA 0)
IF (EMBREE_ISA_NEON)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_NEON2X)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_SSE2)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_SSE42)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_AVX)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_AVX2)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_AVX512)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (NUMISA GREATER 1)
IF (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
IF (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
MESSAGE(FATAL_ERROR "Using Embree as static library is not supported with AppleClang >= 9.0 when multiple ISAs are selected. Please either build a shared library or enable only one ISA.")
ENDIF()
ENDIF()
ENDIF()
ENDIF()
##############################################################
# ISA configuration continued
##############################################################
SET(SSE2 0)
SET(SSE42 1)
SET(AVX 2)
SET(AVX2 3)
SET(AVX512 4)
UNSET(FLAGS_LOWEST)
SET(ISA_LOWEST -1)
SET(ISA_LOWEST_AVX 2)
IF (EMBREE_ARM)
IF (EMBREE_ISA_NEON2X)
LIST(APPEND ISPC_TARGETS "neon-i32x8")
ELSEIF (EMBREE_ISA_NEON)
LIST(APPEND ISPC_TARGETS "neon-i32x4")
ENDIF()
ENDIF()
IF (EMBREE_ISA_NEON)
SET(EMBREE_ISA_SSE2 ON)
ENDIF()
IF (EMBREE_ISA_NEON2X)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
SET(EMBREE_ISA_AVX2 ON)
ENDIF()
IF (EMBREE_ISA_SSE2)
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE2)
IF (NOT EMBREE_ARM)
LIST(APPEND ISPC_TARGETS "sse2")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${SSE2})
SET(FLAGS_LOWEST ${FLAGS_SSE2})
ENDIF()
ENDIF()
IF (EMBREE_ISA_SSE42)
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE42)
IF (NOT EMBREE_ARM)
LIST(APPEND ISPC_TARGETS "sse4")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${SSE42})
SET(FLAGS_LOWEST ${FLAGS_SSE42})
ENDIF()
ENDIF ()
IF (EMBREE_ISA_AVX)
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX)
IF (NOT EMBREE_ARM)
LIST(APPEND ISPC_TARGETS "avx")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${AVX})
SET(ISA_LOWEST_AVX ${AVX})
SET(FLAGS_LOWEST ${FLAGS_AVX})
ENDIF()
ENDIF ()
IF (EMBREE_ISA_AVX2)
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX2)
IF (NOT EMBREE_ARM)
LIST(APPEND ISPC_TARGETS "avx2")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${AVX2})
SET(ISA_LOWEST_AVX ${AVX2})
SET(FLAGS_LOWEST ${FLAGS_AVX2})
ENDIF()
ENDIF ()
IF (EMBREE_ISA_AVX512)
ADD_DEFINITIONS(-DEMBREE_TARGET_AVX512)
IF (NOT EMBREE_ARM)
LIST(APPEND ISPC_TARGETS "avx512skx-i32x16")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${AVX512})
SET(ISA_LOWEST_AVX ${AVX512})
SET(FLAGS_LOWEST ${FLAGS_AVX512})
ENDIF()
ENDIF ()
IF (ISA_LOWEST EQUAL -1)
MESSAGE(FATAL_ERROR "You have to enable at least one ISA!")
ENDIF()
INCLUDE (ispc)
##############################################################
# Create Binary Packages (uses above config options)
##############################################################
include(package)
##############################################################
# Create Config files
##############################################################
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/kernels/config.h.in"
"${PROJECT_SOURCE_DIR}/kernels/config.h"
)
INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR})
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/kernels/rtcore_config.h.in"
"${PROJECT_SOURCE_DIR}/include/embree4/rtcore_config.h"
)
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/kernels/hash.h.in"
"${PROJECT_SOURCE_DIR}/kernels/hash.h"
)
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/kernels/export.linux.map.in"
"${PROJECT_SOURCE_DIR}/kernels/export.linux.map"
)
CONFIGURE_FILE(
"${PROJECT_SOURCE_DIR}/kernels/export.macosx.map.in"
"${PROJECT_SOURCE_DIR}/kernels/export.macosx.map"
)
##############################################################
# Output paths
##############################################################
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
##############################################################
# Directories to compile
##############################################################
ADD_SUBDIRECTORY(common)
ADD_SUBDIRECTORY(kernels)
IF (EMBREE_TUTORIALS)
ADD_SUBDIRECTORY(tutorials)
ENDIF()
##############################################################
# Uninstall
##############################################################
IF (NOT TARGET uninstall)
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/common/cmake/uninstall.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/uninstall.cmake"
IMMEDIATE @ONLY)
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/uninstall.cmake")
SET_PROPERTY(TARGET uninstall PROPERTY FOLDER CMakePredefinedTargets)
ENDIF()
##############################################################
# Has to be last
##############################################################
INCLUDE(CPack)

View file

@ -0,0 +1,103 @@
{
"version": 4,
"cmakeMinimumRequired": {
"major": 3,
"minor": 11,
"patch": 0
},
"include": [
"scripts/cmake-presets/os.json",
"scripts/cmake-presets/package.json",
"scripts/cmake-presets/compiler.json",
"scripts/cmake-presets/tbb.json",
"scripts/cmake-presets/continuous.json",
"scripts/cmake-presets/nightly.json",
"scripts/cmake-presets/release.json",
"scripts/cmake-presets/integrate.json",
"scripts/cmake-presets/performance.json"
],
"configurePresets": [
{
"name": "package-linux-icx-ispc1_19_0-tbb2021_9_0-perf",
"inherits": ["package-linux", "env", "icx", "ispc1_19_0", "tbb2021_9_0"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_MAX_ISA": "AVX512",
"EMBREE_USE_GOOGLE_BENCHMARK": "ON",
"EMBREE_BUILD_GOOGLE_BENCHMARK_FROM_SOURCE": "ON"
}
},
{
"name": "package-linux-icx-sycl-ispc1_19_0-tbb2021_9_0-perf",
"inherits": ["package-linux", "env", "icx", "ispc1_19_0", "tbb2021_9_0"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_SYCL_SUPPORT": "ON",
"EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF",
"EMBREE_SYCL_AOT_DEVICES": "none",
"EMBREE_MAX_ISA": "AVX512",
"EMBREE_USE_GOOGLE_BENCHMARK": "ON",
"EMBREE_BUILD_GOOGLE_BENCHMARK_FROM_SOURCE": "ON"
}
},
{
"name": "package-windows-v141-ispc1_19_0-tbb2021_9_0",
"inherits": ["package-windows", "env", "v141", "ispc1_19_0", "tbb2021_9_0"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_MAX_ISA": "SSE2"
}
},
{
"name": "package-windows-v140-ispc1_19_0-tbb2021_9_0",
"inherits": ["package-windows", "env", "v140", "ispc1_19_0", "tbb2021_9_0"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_MAX_ISA": "SSE2"
}
},
{
"name": "package-windows-icx-sycl-ispc1_19_0-tbb2021_9_0",
"inherits": ["package-windows", "env", "icx-windows", "ispc1_19_0", "tbb2021_9_0"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_SYCL_SUPPORT": "ON",
"EMBREE_SYCL_L0_RTAS_BUILDER" : "OFF",
"EMBREE_SYCL_AOT_DEVICES": "none",
"EMBREE_MAX_ISA": "AVX512"
}
},
{
"name": "linux-coverity",
"inherits": ["package-linux", "env", "gcc", "tasking_internal"],
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"EMBREE_MAX_ISA": "SSE2"
}
}
]
}

View file

@ -0,0 +1,6 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
set(CTEST_PROJECT_NAME "Embree")
set(TEST_MODELS_HASH 05b5a61035485d3090868f9abf5cc057d1e31101)

202
Framework/external/embree/LICENSE.txt vendored Normal file
View file

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

9926
Framework/external/embree/README.md vendored Normal file

File diff suppressed because it is too large Load diff

14
Framework/external/embree/SECURITY.md vendored Normal file
View file

@ -0,0 +1,14 @@
Security Policy
===============
Intel is committed to rapidly addressing security vulnerabilities
affecting our customers and providing clear guidance on the solution,
impact, severity and mitigation.
Reporting a Vulnerability
-------------------------
Please [report any security vulnerabilities][guidelines] in this project
utilizing the [guidelines here][guidelines].
[guidelines]: https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html "Vulnerability Handling Guidelines"

View file

@ -0,0 +1,8 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
ADD_SUBDIRECTORY(sys)
ADD_SUBDIRECTORY(math)
ADD_SUBDIRECTORY(simd)
ADD_SUBDIRECTORY(lexers)
ADD_SUBDIRECTORY(tasking)

View file

@ -0,0 +1,56 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <functional>
#include "parallel_reduce.h"
namespace embree
{
template<typename Index, class UnaryPredicate>
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
{
std::atomic_bool ret;
ret = false;
#if defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred,&context](const tbb::blocked_range<size_t>& r) {
if (context.is_group_execution_cancelled()) return;
for (size_t i = r.begin(); i != r.end(); ++i) {
if (pred(i)) {
ret = true;
context.cancel_group_execution();
}
}
});
#else
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred](const tbb::blocked_range<size_t>& r) {
if (tbb::task::self().is_cancelled()) return;
for (size_t i = r.begin(); i != r.end(); ++i) {
if (pred(i)) {
ret = true;
tbb::task::self().cancel_group_execution();
}
}
});
#endif
#else
ret = parallel_reduce (first, last, false, [pred](const range<size_t>& r)->bool {
bool localret = false;
for (auto i=r.begin(); i<r.end(); ++i) {
localret |= pred(i);
}
return localret;
},
std::bit_or<bool>()
);
#endif
return ret;
}
} // end namespace

View file

@ -0,0 +1,93 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Ty, typename Index, typename Predicate>
inline Index sequential_filter( Ty* data, const Index first, const Index last, const Predicate& predicate)
{
Index j = first;
for (Index i=first; i<last; i++)
if (predicate(data[i]))
data[j++] = data[i];
return j;
}
template<typename Ty, typename Index, typename Predicate>
inline Index parallel_filter( Ty* data, const Index begin, const Index end, const Index minStepSize, const Predicate& predicate)
{
/* sequential fallback */
if (end-begin <= minStepSize)
return sequential_filter(data,begin,end,predicate);
/* calculate number of tasks to use */
enum { MAX_TASKS = 64 };
const Index numThreads = TaskScheduler::threadCount();
const Index numBlocks = (end-begin+minStepSize-1)/minStepSize;
const Index taskCount = min(numThreads,numBlocks,(Index)MAX_TASKS);
/* filter blocks */
Index nused[MAX_TASKS];
Index nfree[MAX_TASKS];
parallel_for(taskCount, [&](const Index taskIndex)
{
const Index i0 = begin+(taskIndex+0)*(end-begin)/taskCount;
const Index i1 = begin+(taskIndex+1)*(end-begin)/taskCount;
const Index i2 = sequential_filter(data,i0,i1,predicate);
nused[taskIndex] = i2-i0;
nfree[taskIndex] = i1-i2;
});
/* calculate offsets */
Index sused=0;
Index sfree=0;
Index pfree[MAX_TASKS];
for (Index i=0; i<taskCount; i++)
{
sused+=nused[i];
Index cfree = nfree[i]; pfree[i] = sfree; sfree+=cfree;
}
/* return if we did not filter out any element */
assert(sfree <= end-begin);
assert(sused <= end-begin);
if (sused == end-begin)
return end;
/* otherwise we have to copy misplaced elements around */
parallel_for(taskCount, [&](const Index taskIndex)
{
/* destination to write elements to */
Index dst = begin+(taskIndex+0)*(end-begin)/taskCount+nused[taskIndex];
Index dst_end = min(dst+nfree[taskIndex],begin+sused);
if (dst_end <= dst) return;
/* range of misplaced elements to copy to destination */
Index r0 = pfree[taskIndex];
Index r1 = r0+dst_end-dst;
/* find range in misplaced elements in back to front order */
Index k0=0;
for (Index i=taskCount-1; i>0; i--)
{
if (k0 > r1) break;
Index k1 = k0+nused[i];
Index src = begin+(i+0)*(end-begin)/taskCount+nused[i];
for (Index i=max(r0,k0); i<min(r1,k1); i++) {
Index isrc = src-i+k0-1;
assert(dst >= begin && dst < end);
assert(isrc >= begin && isrc < end);
data[dst++] = data[isrc];
}
k0 = k1;
}
});
return begin+sused;
}
}

View file

@ -0,0 +1,161 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../tasking/taskscheduler.h"
#include "../sys/array.h"
#include "../math/emath.h"
#include "../math/range.h"
namespace embree
{
/* parallel_for without range */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index N, const Func& func)
{
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
if (N) {
TaskScheduler::TaskGroupContext context;
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
assert(r.size() == 1);
func(r.begin());
},&context);
TaskScheduler::wait();
if (context.cancellingException != nullptr) {
std::rethrow_exception(context.cancellingException);
}
}
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
});
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
concurrency::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
});
#else
# error "no tasking system enabled"
#endif
}
/* parallel for with range and granulatity */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
{
assert(first <= last);
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
TaskScheduler::TaskGroupContext context;
TaskScheduler::spawn(first,last,minStepSize,func,&context);
TaskScheduler::wait();
if (context.cancellingException != nullptr) {
std::rethrow_exception(context.cancellingException);
}
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
func(range<Index>(r.begin(),r.end()));
},context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
func(range<Index>(r.begin(),r.end()));
});
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
concurrency::parallel_for(first, last, Index(1) /*minStepSize*/, [&](Index i) {
func(range<Index>(i,i+1));
});
#else
# error "no tasking system enabled"
#endif
}
/* parallel for with range */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index first, const Index last, const Func& func)
{
assert(first <= last);
parallel_for(first,last,(Index)1,func);
}
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION > 4001)
template<typename Index, typename Func>
__forceinline void parallel_for_static( const Index N, const Func& func)
{
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},tbb::simple_partitioner(),context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},tbb::simple_partitioner());
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
}
typedef tbb::affinity_partitioner affinity_partitioner;
template<typename Index, typename Func>
__forceinline void parallel_for_affinity( const Index N, const Func& func, tbb::affinity_partitioner& ap)
{
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},ap,context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},ap);
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
}
#else
template<typename Index, typename Func>
__forceinline void parallel_for_static( const Index N, const Func& func)
{
parallel_for(N,func);
}
struct affinity_partitioner {
};
template<typename Index, typename Func>
__forceinline void parallel_for_affinity( const Index N, const Func& func, affinity_partitioner& ap)
{
parallel_for(N,func);
}
#endif
}

View file

@ -0,0 +1,160 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename ArrayArray, typename Func>
__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
{
size_t k=0;
for (size_t i=0; i!=array2.size(); ++i) {
const size_t N = array2[i]->size();
if (N) func(array2[i],range<size_t>(0,N),k);
k+=N;
}
}
class ParallelForForState
{
public:
enum { MAX_TASKS = 64 };
__forceinline ParallelForForState ()
: taskCount(0) {}
template<typename ArrayArray>
__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
init(array2,minStepSize);
}
template<typename SizeFunc>
__forceinline ParallelForForState (const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) {
init(numArrays,getSize,minStepSize);
}
template<typename SizeFunc>
__forceinline void init ( const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize )
{
/* first calculate total number of elements */
size_t N = 0;
for (size_t i=0; i<numArrays; i++) {
N += getSize(i);
}
this->N = N;
/* calculate number of tasks to use */
const size_t numThreads = TaskScheduler::threadCount();
const size_t numBlocks = (N+minStepSize-1)/minStepSize;
taskCount = max(size_t(1),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS)));
/* calculate start (i,j) for each task */
size_t taskIndex = 0;
i0[taskIndex] = 0;
j0[taskIndex] = 0;
size_t k0 = (++taskIndex)*N/taskCount;
for (size_t i=0, k=0; taskIndex < taskCount; i++)
{
assert(i<numArrays);
size_t j=0, M = getSize(i);
while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
assert(taskIndex<taskCount);
i0[taskIndex] = i;
j0[taskIndex] = j += k0-k;
k=k0;
k0 = (++taskIndex)*N/taskCount;
}
k+=M-j;
}
}
template<typename ArrayArray>
__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
{
init(array2.size(),[&](size_t i) { return array2[i] ? array2[i]->size() : 0; },minStepSize);
}
__forceinline size_t size() const {
return N;
}
public:
size_t i0[MAX_TASKS];
size_t j0[MAX_TASKS];
size_t taskCount;
size_t N;
};
template<typename ArrayArray, typename Func>
__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
{
ParallelForForState state(array2,minStepSize);
parallel_for(state.taskCount, [&](const size_t taskIndex)
{
/* calculate range */
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
for (size_t i=i0; k<k1; i++) {
const size_t N = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(N,r0+k1-k);
if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
k+=r1-r0; j0 = 0;
}
});
}
template<typename ArrayArray, typename Func>
__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
{
parallel_for_for(array2,1,func);
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
ParallelForForState state(array2,minStepSize);
Value temp[ParallelForForState::MAX_TASKS];
for (size_t i=0; i<state.taskCount; i++)
temp[i] = identity;
parallel_for(state.taskCount, [&](const size_t taskIndex)
{
/* calculate range */
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
for (size_t i=i0; k<k1; i++) {
const size_t N = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(N,r0+k1-k);
if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
k+=r1-r0; j0 = 0;
}
});
Value ret = identity;
for (size_t i=0; i<state.taskCount; i++)
ret = reduction(ret,temp[i]);
return ret;
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_reduce(array2,1,identity,func,reduction);
}
}

View file

@ -0,0 +1,142 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for_for.h"
#include "parallel_prefix_sum.h"
namespace embree
{
template<typename Value>
struct ParallelForForPrefixSumState : public ParallelForForState
{
__forceinline ParallelForForPrefixSumState () {}
template<typename ArrayArray>
__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
: ParallelForForState(array2,minStepSize) {}
template<typename SizeFunc>
__forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize)
: ParallelForForState(numArrays,getSize,minStepSize) {}
ParallelPrefixSumState<Value> prefix_state;
};
template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t taskCount = state.taskCount;
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
Value N=identity;
for (size_t i=i0; k<k1; i++) {
const size_t size = getSize(i);
const size_t r0 = j0, r1 = min(size,r0+k1-k);
if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k));
k+=r1-r0; j0 = 0;
}
state.prefix_state.counts[taskIndex] = N;
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.prefix_state.counts[i];
state.prefix_state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
const SizeFunc& getSize,
const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t taskCount = state.taskCount;
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
Value N=identity;
for (size_t i=i0; k<k1; i++) {
const size_t size = getSize(i);
const size_t r0 = j0, r1 = min(size,r0+k1-k);
if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N)));
k+=r1-r0; j0 = 0;
}
state.prefix_state.counts[taskIndex] = N;
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.prefix_state.counts[i];
state.prefix_state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state,
ArrayArray& array2, Index minStepSize,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum0_(state,minStepSize,
[&](Index i) { return array2[i] ? array2[i]->size() : 0; },
identity,
[&](Index i, const range<Index>& r, Index k) { return func(array2[i], r, k, i); },
reduction);
}
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state,
ArrayArray& array2, Index minStepSize,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum1_(state,minStepSize,
[&](Index i) { return array2[i] ? array2[i]->size() : 0; },
identity,
[&](Index i, const range<Index>& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); },
reduction);
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
}
}

View file

@ -0,0 +1,85 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_sort.h"
namespace embree
{
/*! implementation of a key/value map with parallel construction */
template<typename Key, typename Val>
class parallel_map
{
/* key/value pair to build the map */
struct KeyValue
{
__forceinline KeyValue () {}
__forceinline KeyValue (const Key key, const Val val)
: key(key), val(val) {}
__forceinline operator Key() const {
return key;
}
public:
Key key;
Val val;
};
public:
/*! parallel map constructors */
parallel_map () {}
/*! construction from pair of vectors */
template<typename KeyVector, typename ValVector>
parallel_map (const KeyVector& keys, const ValVector& values) { init(keys,values); }
/*! initialized the parallel map from a vector with keys and values */
template<typename KeyVector, typename ValVector>
void init(const KeyVector& keys, const ValVector& values)
{
/* reserve sufficient space for all data */
assert(keys.size() == values.size());
vec.resize(keys.size());
/* generate key/value pairs */
parallel_for( size_t(0), keys.size(), size_t(4*4096), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
vec[i] = KeyValue((Key)keys[i],values[i]);
});
/* perform parallel radix sort of the key/value pairs */
std::vector<KeyValue> temp(keys.size());
radix_sort<KeyValue,Key>(vec.data(),temp.data(),keys.size());
}
/*! Returns a pointer to the value associated with the specified key. The pointer will be nullptr of the key is not contained in the map. */
__forceinline const Val* lookup(const Key& key) const
{
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
if (i == vec.end()) return nullptr;
if (i->key != key) return nullptr;
return &i->val;
}
/*! If the key is in the map, the function returns the value associated with the key, otherwise it returns the default value. */
__forceinline Val lookup(const Key& key, const Val& def) const
{
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
if (i == vec.end()) return def;
if (i->key != key) return def;
return i->val;
}
/*! clears all state */
void clear() {
vec.clear();
}
private:
std::vector<KeyValue> vec; //!< vector containing sorted elements
};
}

View file

@ -0,0 +1,283 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
#include "../math/range.h"
namespace embree
{
/* serial partitioning */
template<typename T, typename V, typename IsLeft, typename Reduction_T>
__forceinline size_t serial_partitioning(T* array,
const size_t begin,
const size_t end,
V& leftReduction,
V& rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t)
{
T* l = array + begin;
T* r = array + end - 1;
while(1)
{
/* *l < pivot */
while (likely(l <= r && is_left(*l) ))
{
//prefetchw(l+4); // FIXME: enable?
reduction_t(leftReduction,*l);
++l;
}
/* *r >= pivot) */
while (likely(l <= r && !is_left(*r)))
{
//prefetchw(r-4); FIXME: enable?
reduction_t(rightReduction,*r);
--r;
}
if (r<l) break;
reduction_t(leftReduction ,*r);
reduction_t(rightReduction,*l);
xchg(*l,*r);
l++; r--;
}
return l - array;
}
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
class __aligned(64) parallel_partition_task
{
ALIGNED_CLASS_(64);
private:
static const size_t MAX_TASKS = 64;
T* array;
size_t N;
const IsLeft& is_left;
const Reduction_T& reduction_t;
const Reduction_V& reduction_v;
const Vi& identity;
size_t numTasks;
__aligned(64) size_t counter_start[MAX_TASKS+1];
__aligned(64) size_t counter_left[MAX_TASKS+1];
__aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];
__aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS];
__aligned(64) V leftReductions[MAX_TASKS];
__aligned(64) V rightReductions[MAX_TASKS];
public:
__forceinline parallel_partition_task(T* array,
const size_t N,
const Vi& identity,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
const size_t BLOCK_SIZE)
: array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
__forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
{
size_t i = 0;
while(index >= (size_t)r[i].size())
{
assert(i < numRanges);
index -= (size_t)r[i].size();
i++;
}
return &r[i];
}
__forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
const size_t numRightMisplacedRanges,
const size_t startID,
const size_t endID)
{
size_t leftLocalIndex = startID;
size_t rightLocalIndex = startID;
const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
size_t l_left = l_range->size() - leftLocalIndex;
size_t r_left = r_range->size() - rightLocalIndex;
T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
size_t size = endID - startID;
size_t items = min(size,min(l_left,r_left));
while (size)
{
if (unlikely(l_left == 0))
{
l_range++;
l_left = l_range->size();
l = &array[l_range->begin()];
items = min(size,min(l_left,r_left));
}
if (unlikely(r_left == 0))
{
r_range++;
r_left = r_range->size();
r = &array[r_range->begin()];
items = min(size,min(l_left,r_left));
}
size -= items;
l_left -= items;
r_left -= items;
while(items) {
items--;
xchg(*l++,*r++);
}
}
}
__forceinline size_t partition(V& leftReduction, V& rightReduction)
{
/* partition the individual ranges for each task */
parallel_for(numTasks,[&] (const size_t taskID) {
const size_t startID = (taskID+0)*N/numTasks;
const size_t endID = (taskID+1)*N/numTasks;
V local_left(identity);
V local_right(identity);
const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
counter_start[taskID] = startID;
counter_left [taskID] = mid-startID;
leftReductions[taskID] = local_left;
rightReductions[taskID] = local_right;
});
counter_start[numTasks] = N;
counter_left[numTasks] = 0;
/* finalize the reductions */
for (size_t i=0; i<numTasks; i++) {
reduction_v(leftReduction,leftReductions[i]);
reduction_v(rightReduction,rightReductions[i]);
}
/* calculate mid point for partitioning */
size_t mid = counter_left[0];
for (size_t i=1; i<numTasks; i++)
mid += counter_left[i];
const range<ssize_t> globalLeft (0,mid);
const range<ssize_t> globalRight(mid,N);
/* calculate all left and right ranges that are on the wrong global side */
size_t numMisplacedRangesLeft = 0;
size_t numMisplacedRangesRight = 0;
size_t numMisplacedItemsLeft MAYBE_UNUSED = 0;
size_t numMisplacedItemsRight MAYBE_UNUSED = 0;
for (size_t i=0; i<numTasks; i++)
{
const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
const range<ssize_t> left_misplaced = globalLeft. intersect(right_range);
const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
if (!left_misplaced.empty())
{
numMisplacedItemsLeft += left_misplaced.size();
leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
}
if (!right_misplaced.empty())
{
numMisplacedItemsRight += right_misplaced.size();
rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
}
}
assert( numMisplacedItemsLeft == numMisplacedItemsRight );
/* if no items are misplaced we are done */
if (numMisplacedItemsLeft == 0)
return mid;
/* otherwise we copy the items to the right place in parallel */
parallel_for(numTasks,[&] (const size_t taskID) {
const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks;
swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);
});
return mid;
}
};
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
__noinline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const Vi &identity,
V &leftReduction,
V &rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
size_t BLOCK_SIZE = 128)
{
/* fall back to single threaded partitioning for small N */
if (unlikely(end-begin < BLOCK_SIZE))
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
/* otherwise use parallel code */
else {
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
return begin+p->partition(leftReduction,rightReduction);
}
}
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
__noinline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const Vi &identity,
V &leftReduction,
V &rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
size_t BLOCK_SIZE,
size_t PARALLEL_THRESHOLD)
{
/* fall back to single threaded partitioning for small N */
if (unlikely(end-begin < PARALLEL_THRESHOLD))
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
/* otherwise use parallel code */
else {
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
return begin+p->partition(leftReduction,rightReduction);
}
}
template<typename T, typename IsLeft>
inline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const IsLeft& is_left,
size_t BLOCK_SIZE = 128)
{
size_t leftReduction = 0;
size_t rightReduction = 0;
return parallel_partitioning(
array,begin,end,0,leftReduction,rightReduction,is_left,
[] (size_t& t,const T& ref) { },
[] (size_t& t0,size_t& t1) { },
BLOCK_SIZE);
}
}

View file

@ -0,0 +1,85 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Value>
struct ParallelPrefixSumState
{
enum { MAX_TASKS = 64 };
Value counts[MAX_TASKS];
Value sums [MAX_TASKS];
};
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t numThreads = TaskScheduler::threadCount();
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.counts[i];
state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
/*! parallel calculation of prefix sums */
template<typename SrcArray, typename DstArray, typename Value, typename Add>
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
{
/* perform single threaded prefix operation for small N */
if (N < SINGLE_THREAD_THRESHOLD)
{
Value sum=identity;
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
return sum;
}
/* perform parallel prefix operation for large N */
else
{
ParallelPrefixSumState<Value> state;
/* initial run just sets up start values for subtasks */
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
Value s = identity;
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
return s;
}, add);
/* final run calculates prefix sum */
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
Value s = identity;
for (size_t i=r.begin(); i<r.end(); i++) {
dst[i] = add(sum,s);
s = add(s,src[i]);
}
return s;
}, add);
}
}
}

View file

@ -0,0 +1,146 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
{
return func(range<Index>(first,last));
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
return func(range<Index>(first,last));
}
template<typename Index, typename Value, typename Func, typename Reduction>
__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
const Index maxTasks = 512;
const Index threadCount = (Index) TaskScheduler::threadCount();
taskCount = min(taskCount,threadCount,maxTasks);
/* parallel invocation of all tasks */
dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
parallel_for(taskCount, [&](const Index taskIndex) {
const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;
const Index k1 = first+(taskIndex+1)*(last-first)/taskCount;
values[taskIndex] = func(range<Index>(k0,k1));
});
/* perform reduction over all tasks */
Value v = identity;
for (Index i=0; i<taskCount; i++) v = reduction(v,values[i]);
return v;
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
/* fast path for small number of iterations */
Index taskCount = (last-first+minStepSize-1)/minStepSize;
if (likely(taskCount == 1)) {
return func(range<Index>(first,last));
}
return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
reduction,context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
return v;
#else
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
reduction);
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
return v;
#endif
#else // TASKING_PPL
struct AlignedValue
{
char storage[__alignof(Value)+sizeof(Value)];
static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); };
Value* getValuePtr() { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
const Value* getValuePtr() const { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
operator Value() const { return *getValuePtr(); }
};
struct Iterator_Index
{
Index v;
typedef std::forward_iterator_tag iterator_category;
typedef AlignedValue value_type;
typedef Index difference_type;
typedef Index distance_type;
typedef AlignedValue* pointer;
typedef AlignedValue& reference;
__forceinline Iterator_Index() {}
__forceinline Iterator_Index(Index v) : v(v) {}
__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
};
auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
assert(begin.v < end.v);
return reduction(start, func(range<Index>(begin.v, end.v)));
};
const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
return v;
#endif
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
{
if (likely(last-first < parallel_threshold)) {
return func(range<Index>(first,last));
} else {
return parallel_reduce(first,last,minStepSize,identity,func,reduction);
}
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
{
return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
{
auto funcr = [&] ( const range<Index> r ) {
Value v = identity;
for (Index i=r.begin(); i<r.end(); i++)
v = reduction(v,func(i));
return v;
};
return parallel_reduce(first,last,Index(1),identity,funcr,reduction);
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
{
return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction);
}
}

View file

@ -0,0 +1,52 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_sort.h"
namespace embree
{
/* implementation of a set of values with parallel construction */
template<typename T>
class parallel_set
{
public:
/*! default constructor for the parallel set */
parallel_set () {}
/*! construction from vector */
template<typename Vector>
parallel_set (const Vector& in) { init(in); }
/*! initialized the parallel set from a vector */
template<typename Vector>
void init(const Vector& in)
{
/* copy data to internal vector */
vec.resize(in.size());
parallel_for( size_t(0), in.size(), size_t(4*4096), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
vec[i] = in[i];
});
/* sort the data */
std::vector<T> temp(in.size());
radix_sort<T>(vec.data(),temp.data(),vec.size());
}
/*! tests if some element is in the set */
__forceinline bool lookup(const T& elt) const {
return std::binary_search(vec.begin(), vec.end(), elt);
}
/*! clears all state */
void clear() {
vec.clear();
}
private:
std::vector<T> vec; //!< vector containing sorted elements
};
}

View file

@ -0,0 +1,454 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../simd/simd.h"
#include "parallel_for.h"
#include <algorithm>
namespace embree
{
template<class T>
__forceinline void insertionsort_ascending(T *__restrict__ array, const size_t length)
{
for(size_t i = 1;i<length;++i)
{
T v = array[i];
size_t j = i;
while(j > 0 && v < array[j-1])
{
array[j] = array[j-1];
--j;
}
array[j] = v;
}
}
template<class T>
__forceinline void insertionsort_decending(T *__restrict__ array, const size_t length)
{
for(size_t i = 1;i<length;++i)
{
T v = array[i];
size_t j = i;
while(j > 0 && v > array[j-1])
{
array[j] = array[j-1];
--j;
}
array[j] = v;
}
}
template<class T>
void quicksort_ascending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] > pivotvalue);
while (t[++left] < pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const int pivot = right;
quicksort_ascending(t, begin, pivot);
quicksort_ascending(t, pivot + 1, end);
}
}
template<class T>
void quicksort_decending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] < pivotvalue);
while (t[++left] > pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const int pivot = right;
quicksort_decending(t, begin, pivot);
quicksort_decending(t, pivot + 1, end);
}
}
template<class T, ssize_t THRESHOLD>
void quicksort_insertionsort_ascending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const ssize_t size = end-begin+1;
if (likely(size <= THRESHOLD))
{
insertionsort_ascending<T>(&t[begin],size);
}
else
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] > pivotvalue);
while (t[++left] < pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const ssize_t pivot = right;
quicksort_insertionsort_ascending<T,THRESHOLD>(t, begin, pivot);
quicksort_insertionsort_ascending<T,THRESHOLD>(t, pivot + 1, end);
}
}
}
template<class T, ssize_t THRESHOLD>
void quicksort_insertionsort_decending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const ssize_t size = end-begin+1;
if (likely(size <= THRESHOLD))
{
insertionsort_decending<T>(&t[begin],size);
}
else
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] < pivotvalue);
while (t[++left] > pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const ssize_t pivot = right;
quicksort_insertionsort_decending<T,THRESHOLD>(t, begin, pivot);
quicksort_insertionsort_decending<T,THRESHOLD>(t, pivot + 1, end);
}
}
}
template<typename T>
static void radixsort32(T* const morton, const size_t num, const unsigned int shift = 3*8)
{
static const unsigned int BITS = 8;
static const unsigned int BUCKETS = (1 << BITS);
static const unsigned int CMP_SORT_THRESHOLD = 16;
__aligned(64) unsigned int count[BUCKETS];
/* clear buckets */
for (size_t i=0;i<BUCKETS;i++) count[i] = 0;
/* count buckets */
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=0;i<num;i++)
count[(unsigned(morton[i]) >> shift) & (BUCKETS-1)]++;
/* prefix sums */
__aligned(64) unsigned int head[BUCKETS];
__aligned(64) unsigned int tail[BUCKETS];
head[0] = 0;
for (size_t i=1; i<BUCKETS; i++)
head[i] = head[i-1] + count[i-1];
for (size_t i=0; i<BUCKETS-1; i++)
tail[i] = head[i+1];
tail[BUCKETS-1] = head[BUCKETS-1] + count[BUCKETS-1];
assert(tail[BUCKETS-1] == head[BUCKETS-1] + count[BUCKETS-1]);
assert(tail[BUCKETS-1] == num);
/* in-place swap */
for (size_t i=0;i<BUCKETS;i++)
{
/* process bucket */
while(head[i] < tail[i])
{
T v = morton[head[i]];
while(1)
{
const size_t b = (unsigned(v) >> shift) & (BUCKETS-1);
if (b == i) break;
std::swap(v,morton[head[b]++]);
}
assert((unsigned(v) >> shift & (BUCKETS-1)) == i);
morton[head[i]++] = v;
}
}
if (shift == 0) return;
size_t offset = 0;
for (size_t i=0;i<BUCKETS;i++)
if (count[i])
{
for (size_t j=offset;j<offset+count[i]-1;j++)
assert(((unsigned(morton[j]) >> shift) & (BUCKETS-1)) == i);
if (unlikely(count[i] < CMP_SORT_THRESHOLD))
insertionsort_ascending(morton + offset, count[i]);
else
radixsort32(morton + offset, count[i], shift-BITS);
for (size_t j=offset;j<offset+count[i]-1;j++)
assert(morton[j] <= morton[j+1]);
offset += count[i];
}
}
template<typename Ty, typename Key>
class ParallelRadixSort
{
static const size_t MAX_TASKS = 64;
static const size_t BITS = 8;
static const size_t BUCKETS = (1 << BITS);
typedef unsigned int TyRadixCount[BUCKETS];
template<typename T>
static bool compare(const T& v0, const T& v1) {
return (Key)v0 < (Key)v1;
}
private:
ParallelRadixSort (const ParallelRadixSort& other) DELETED; // do not implement
ParallelRadixSort& operator= (const ParallelRadixSort& other) DELETED; // do not implement
public:
ParallelRadixSort (Ty* const src, Ty* const tmp, const size_t N)
: radixCount(nullptr), src(src), tmp(tmp), N(N) {}
void sort(const size_t blockSize)
{
assert(blockSize > 0);
/* perform single threaded sort for small N */
if (N<=blockSize) // handles also special case of 0!
{
/* do inplace sort inside destination array */
std::sort(src,src+N,compare<Ty>);
}
/* perform parallel sort for large N */
else
{
const size_t numThreads = min((N+blockSize-1)/blockSize,TaskScheduler::threadCount(),size_t(MAX_TASKS));
tbbRadixSort(numThreads);
}
}
~ParallelRadixSort()
{
alignedFree(radixCount);
radixCount = nullptr;
}
private:
void tbbRadixIteration0(const Key shift,
const Ty* __restrict const src,
Ty* __restrict const dst,
const size_t threadIndex, const size_t threadCount)
{
const size_t startID = (threadIndex+0)*N/threadCount;
const size_t endID = (threadIndex+1)*N/threadCount;
/* mask to extract some number of bits */
const Key mask = BUCKETS-1;
/* count how many items go into the buckets */
for (size_t i=0; i<BUCKETS; i++)
radixCount[threadIndex][i] = 0;
/* iterate over src array and count buckets */
unsigned int * __restrict const count = radixCount[threadIndex];
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=startID; i<endID; i++) {
#if defined(__64BIT__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const Key index = ((Key)src[i] >> shift) & mask;
#endif
count[index]++;
}
}
void tbbRadixIteration1(const Key shift,
const Ty* __restrict const src,
Ty* __restrict const dst,
const size_t threadIndex, const size_t threadCount)
{
const size_t startID = (threadIndex+0)*N/threadCount;
const size_t endID = (threadIndex+1)*N/threadCount;
/* mask to extract some number of bits */
const Key mask = BUCKETS-1;
/* calculate total number of items for each bucket */
__aligned(64) unsigned int total[BUCKETS];
/*
for (size_t i=0; i<BUCKETS; i++)
total[i] = 0;
*/
for (size_t i=0; i<BUCKETS; i+=VSIZEX)
vintx::store(&total[i], zero);
for (size_t i=0; i<threadCount; i++)
{
/*
for (size_t j=0; j<BUCKETS; j++)
total[j] += radixCount[i][j];
*/
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
vintx::store(&total[j], vintx::load(&total[j]) + vintx::load(&radixCount[i][j]));
}
/* calculate start offset of each bucket */
__aligned(64) unsigned int offset[BUCKETS];
offset[0] = 0;
for (size_t i=1; i<BUCKETS; i++)
offset[i] = offset[i-1] + total[i-1];
/* calculate start offset of each bucket for this thread */
for (size_t i=0; i<threadIndex; i++)
{
/*
for (size_t j=0; j<BUCKETS; j++)
offset[j] += radixCount[i][j];
*/
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
vintx::store(&offset[j], vintx::load(&offset[j]) + vintx::load(&radixCount[i][j]));
}
/* copy items into their buckets */
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=startID; i<endID; i++) {
const Ty elt = src[i];
#if defined(__64BIT__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const size_t index = ((Key)src[i] >> shift) & mask;
#endif
dst[offset[index]++] = elt;
}
}
void tbbRadixIteration(const Key shift, const bool last,
const Ty* __restrict src, Ty* __restrict dst,
const size_t numTasks)
{
affinity_partitioner ap;
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration0(shift,src,dst,taskIndex,numTasks); },ap);
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration1(shift,src,dst,taskIndex,numTasks); },ap);
}
void tbbRadixSort(const size_t numTasks)
{
radixCount = (TyRadixCount*) alignedMalloc(MAX_TASKS*sizeof(TyRadixCount),64);
if (sizeof(Key) == sizeof(uint32_t)) {
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
tbbRadixIteration(3*BITS,1,tmp,src,numTasks);
}
else if (sizeof(Key) == sizeof(uint64_t))
{
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
tbbRadixIteration(3*BITS,0,tmp,src,numTasks);
tbbRadixIteration(4*BITS,0,src,tmp,numTasks);
tbbRadixIteration(5*BITS,0,tmp,src,numTasks);
tbbRadixIteration(6*BITS,0,src,tmp,numTasks);
tbbRadixIteration(7*BITS,1,tmp,src,numTasks);
}
}
private:
TyRadixCount* radixCount;
Ty* const src;
Ty* const tmp;
const size_t N;
};
template<typename Ty>
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
{
ParallelRadixSort<Ty,Ty>(src,tmp,N).sort(blockSize);
}
template<typename Ty, typename Key>
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
{
ParallelRadixSort<Ty,Key>(src,tmp,N).sort(blockSize);
}
template<typename Ty>
void radix_sort_u32(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
radix_sort<Ty,uint32_t>(src,tmp,N,blockSize);
}
template<typename Ty>
void radix_sort_u64(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
radix_sort<Ty,uint64_t>(src,tmp,N,blockSize);
}
}

View file

@ -0,0 +1,75 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
IF (NOT OPENIMAGEIO_ROOT)
SET(OPENIMAGEIO_ROOT $ENV{OPENIMAGEIO_ROOT})
ENDIF()
IF (NOT OPENIMAGEIO_ROOT)
SET(OPENIMAGEIO_ROOT $ENV{OPENIMAGEIOROOT})
ENDIF()
# detect changed OPENIMAGEIO_ROOT
IF (NOT OPENIMAGEIO_ROOT STREQUAL OPENIMAGEIO_ROOT_LAST)
UNSET(OPENIMAGEIO_INCLUDE_DIR CACHE)
UNSET(OPENIMAGEIO_LIBRARY CACHE)
ENDIF()
set(OPENIMAGEIO_LIB_SUFFIX "")
IF (WIN32)
IF (MSVC14)
SET(OPENIMAGEIO_LIB_SUFFIX "vc2015")
ELSEIF (MSVC12)
SET(OPENIMAGEIO_LIB_SUFFIX "vc2013")
ELSEIF (MSVC11)
SET(OPENIMAGEIO_LIB_SUFFIX "vc2012")
ELSEIF (MINGW)
IF (X64)
SET(OPENIMAGEIO_LIB_SUFFIX "mingw-w64")
# Who's ever going to build for 32bit??
ELSE ()
SET(OPENIMAGEIO_LIB_SUFFIX "mingw-w64")
ENDIF()
ENDIF()
ENDIF ()
FIND_PATH(OPENIMAGEIO_ROOT include/OpenImageIO/imageio.h
DOC "Root of OpenImageIO installation"
HINTS ${OPENIMAGEIO_ROOT}
PATHS
"${PROJECT_SOURCE_DIR}/oiio"
/usr/local
/usr
/
)
FIND_PATH(OPENIMAGEIO_INCLUDE_DIR OpenImageIO/imageio.h PATHS ${OPENIMAGEIO_ROOT}/include NO_DEFAULT_PATH)
SET(OPENIMAGEIO_HINTS
HINTS
${OPENIMAGEIO_ROOT}
PATH_SUFFIXES
/lib
/lib64
/lib-${OPENIMAGEIO_LIB_SUFFIX}
)
SET(OPENIMAGEIO_PATHS PATHS /usr/lib /usr/lib64 /lib /lib64)
FIND_LIBRARY(OPENIMAGEIO_LIBRARY OpenImageIO ${OPENIMAGEIO_HINTS} ${OPENIMAGEIO_PATHS})
SET(OPENIMAGEIO_ROOT_LAST ${OPENIMAGEIO_ROOT} CACHE INTERNAL "Last value of OPENIMAGEIO_ROOT to detect changes")
SET(OPENIMAGEIO_ERROR_MESSAGE "OpenImageIO not found in your environment. You can 1) install
via your OS package manager, or 2) install it
somewhere on your machine and point OPENIMAGEIO_ROOT to it.")
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenImageIO
${OPENIMAGEIO_ERROR_MESSAGE}
OPENIMAGEIO_INCLUDE_DIR OPENIMAGEIO_LIBRARY
)
IF (OPENIMAGEIO_FOUND)
SET(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
SET(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARY})
ENDIF()
MARK_AS_ADVANCED(OPENIMAGEIO_INCLUDE_DIR)
MARK_AS_ADVANCED(OPENIMAGEIO_LIBRARY)

View file

@ -0,0 +1,11 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
FIND_PATH( PNG_INCLUDE_DIR NAMES png.h )
FIND_LIBRARY( PNG_LIBRARIES NAMES png )
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PNG DEFAULT_MSG PNG_INCLUDE_DIR PNG_LIBRARIES)
MARK_AS_ADVANCED(PNG_INCLUDE_DIR)
MARK_AS_ADVANCED(PNG_LIBRARIES)

View file

@ -0,0 +1,482 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
#===============================================================================
# This script will attempt to find TBB and set up a TBB target.
#
# The user may specify a version and lists of required and optional components:
#
# find_package(TBB 2017.0 EXACT REQUIRED
# tbb tbbmalloc
# OPTIONAL_COMPONENTS tbbmalloc_proxy
# QUIET)
#
# If this target exists already, the script will attempt to re-use it, but fail
# if version or components do not match the user-specified requirements.
#
# If all the required component targets (e.g. TBB::tbb) exist, the script will
# attempt to create a target TBB and link existing component targets to it.
# It will fail if the component target version does not match the user-specified
# requirements.
#
# The user may specify the following variables to help the search process:
# - TBB_ROOT
# - TBB_INCLUDE_DIR
#
# After the script has run successfully, there is a target TBB, as well as
# component targets TBB::<COMPONENT>, e.g. TBB::tbbmalloc.
#
# The targets will attempt to link to release versions of TBB in release mode,
# and debug versions in debug mode.
#
# In addition to the targets, the script defines:
#
# TBB_FOUND
# TBB_INCLUDE_DIRS
#
#===============================================================================
# We use INTERFACE libraries, which are only supported in 3.x
cmake_minimum_required(VERSION 3.1)
# These two are used to automatically find the root and include directories.
set(_TBB_INCLUDE_SUBDIR "include")
set(_TBB_HEADER "tbb/tbb.h")
# Initialize cache variable; but use existing non-cache variable as the default,
# and fall back to the environment variable.
if (NOT TBB_ROOT)
set(TBB_ROOT "$ENV{TBB_ROOT}")
endif()
set(TBB_ROOT "${TBB_ROOT}" CACHE PATH "The root path of TBB.")
#===============================================================================
# Error messages that respect the user's wishes about peace and quiet.
#===============================================================================
function(rk_tbb_status)
if (NOT TBB_FIND_QUIETLY)
message(STATUS "${ARGV}")
endif()
endfunction()
function(rk_tbb_warning)
if (NOT TBB_FIND_QUIETLY)
message(WARNING "${ARGV}")
endif()
endfunction()
macro(rk_tbb_error)
if (TBB_FIND_REQUIRED)
message(FATAL_ERROR "${ARGV}")
else()
rk_tbb_warning("${ARGV}")
endif()
return()
endmacro()
#===============================================================================
# Extract a list of required and optional components.
#===============================================================================
macro(rk_tbb_list_components)
# cmake provides the TBB_FIND_COMPONENTS and
# TBB_FIND_REQUIRED_<C> variables based on the invocation
# of find_package.
if (TBB_FIND_COMPONENTS STREQUAL "")
set(_REQUIRED_COMPONENTS "tbb")
set(_OPTIONAL_COMPONENTS "tbbmalloc"
"tbbmalloc_proxy"
"tbbbind"
"tbbpreview")
else()
set(_REQUIRED_COMPONENTS "")
set(_OPTIONAL_COMPONENTS "")
foreach (C IN LISTS TBB_FIND_COMPONENTS)
if (${TBB_FIND_REQUIRED_${C}})
list(APPEND _REQUIRED_COMPONENTS ${C})
else()
list(APPEND _OPTIONAL_COMPONENTS ${C})
endif()
endforeach()
endif()
rk_tbb_status("Looking for TBB components ${_REQUIRED_COMPONENTS}"
" (${_OPTIONAL_COMPONENTS})")
endmacro()
#===============================================================================
# List components that are available, and check if any REQUIRED components
# are missing.
#===============================================================================
macro(rk_tbb_check_components)
set(_TBB_MISSING_COMPONENTS "")
set(_TBB_AVAILABLE_COMPONENTS "")
foreach (C IN LISTS _REQUIRED_COMPONENTS)
if (TARGET TBB::${C})
list(APPEND _TBB_AVAILABLE_COMPONENTS ${C})
else()
list(APPEND _TBB_MISSING_COMPONENTS ${C})
endif()
endforeach()
foreach (C IN LISTS _OPTIONAL_COMPONENTS)
if (TARGET TBB::${C})
list(APPEND _TBB_AVAILABLE_COMPONENTS ${C})
endif()
endforeach()
endmacro()
#===============================================================================
# Check the version of the TBB root we found.
#===============================================================================
macro(rk_tbb_check_version)
# Extract the version we found in our root.
if(EXISTS "${TBB_INCLUDE_DIR}/oneapi/tbb/version.h")
set(_TBB_VERSION_HEADER "oneapi/tbb/version.h")
elseif(EXISTS "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h")
set(_TBB_VERSION_HEADER "tbb/tbb_stddef.h")
elseif(EXISTS "${TBB_INCLUDE_DIR}/tbb/version.h")
set(_TBB_VERSION_HEADER "tbb/version.h")
else()
rk_tbb_error("Missing TBB version information. Could not find"
"tbb/tbb_stddef.h or tbb/version.h in ${TBB_INCLUDE_DIR}")
endif()
file(READ "${TBB_INCLUDE_DIR}/${_TBB_VERSION_HEADER}" VERSION_HEADER_CONTENT)
string(REGEX MATCH "#define TBB_VERSION_MAJOR ([0-9]+)" DUMMY "${VERSION_HEADER_CONTENT}")
set(TBB_VERSION_MAJOR ${CMAKE_MATCH_1})
string(REGEX MATCH "#define TBB_VERSION_MINOR ([0-9]+)" DUMMY "${VERSION_HEADER_CONTENT}")
set(TBB_VERSION_MINOR ${CMAKE_MATCH_1})
set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}")
set(TBB_VERSION_STRING "${TBB_VERSION}")
# If the user provided information about required versions, check them!
if (TBB_FIND_VERSION)
if (${TBB_FIND_VERSION_EXACT} AND NOT
TBB_VERSION VERSION_EQUAL ${TBB_FIND_VERSION})
rk_tbb_error("Requested exact TBB version ${TBB_FIND_VERSION},"
" but found ${TBB_VERSION}")
elseif(TBB_VERSION VERSION_LESS ${TBB_FIND_VERSION})
rk_tbb_error("Requested minimum TBB version ${TBB_FIND_VERSION},"
" but found ${TBB_VERSION}")
endif()
endif()
rk_tbb_status("Found TBB version ${TBB_VERSION} at ${TBB_ROOT}")
endmacro()
#===============================================================================
# Reuse existing targets.
# NOTE: This must be a macro, as we rely on return() to exit this script.
#===============================================================================
macro(rk_tbb_reuse_existing_target_components)
rk_tbb_check_components()
if (_TBB_MISSING_COMPONENTS STREQUAL "")
rk_tbb_status("Found existing TBB component targets: ${_TBB_AVAILABLE_COMPONENTS}")
# Get TBB_INCLUDE_DIR if not already set to check for the version of the
# existing component targets (making the assumption that they all have
# the same version)
if (NOT TBB_INCLUDE_DIR)
list(GET _TBB_AVAILABLE_COMPONENTS 0 first_target)
get_target_property(TBB_INCLUDE_DIR TBB::${first_target} INTERFACE_INCLUDE_DIRECTORIES)
foreach(TGT IN LISTS _TBB_AVAILABLE_COMPONENTS)
get_target_property(_TGT_INCLUDE_DIR TBB::${TGT} INTERFACE_INCLUDE_DIRECTORIES)
if (NOT _TGT_INCLUDE_DIR STREQUAL "${TBB_INCLUDE_DIR}")
rk_tbb_error("Existing TBB component targets have inconsistent include directories.")
endif()
endforeach()
endif()
find_path(TBB_INCLUDE_DIR
NAMES "${_TBB_HEADER}"
PATHS "${TBB_INCLUDE_DIRS}")
# Extract TBB_ROOT from the include path so that rk_tbb_check_version
# prints the correct tbb location
string(REPLACE "/${_TBB_INCLUDE_SUBDIR}" "" TBB_ROOT "${TBB_INCLUDE_DIR}")
rk_tbb_check_version()
# Add target TBB and link all available components
if (NOT TARGET TBB)
add_library(TBB INTERFACE)
foreach(C IN LISTS _TBB_AVAILABLE_COMPONENTS)
target_link_libraries(TBB INTERFACE TBB::${C})
endforeach()
endif()
set(TBB_FOUND TRUE)
set(TBB_INCLUDE_DIRS "${TBB_INCLUDE_DIR}")
return()
elseif ((TARGET TBB) OR (NOT _TBB_AVAILABLE_COMPONENTS STREQUAL ""))
rk_tbb_error("Ignoring existing TBB targets because required components are missing: ${_TBB_MISSING_COMPONENTS}")
endif()
endmacro()
#===============================================================================
# Find the root directory if a manual override is not specified.
# Sets TBB_ROOT in the parent scope, but does not check for failure.
#===============================================================================
function(rk_tbb_find_root)
if (NOT TBB_ROOT OR TBB_ROOT STREQUAL "")
set(TBB_HINTS "")
set(TBB_PATHS "")
if (WIN32)
# workaround for parentheses in variable name / CMP0053
set(PROGRAMFILESx86 "PROGRAMFILES(x86)")
set(PROGRAMFILES32 "$ENV{${PROGRAMFILESx86}}")
if(NOT PROGRAMFILES32)
set(PROGRAMFILES32 "$ENV{PROGRAMFILES}")
endif()
if(NOT PROGRAMFILES32)
set(PROGRAMFILES32 "C:/Program Files (x86)")
endif()
set(TBB_PATHS
"${PROJECT_SOURCE_DIR}/../tbb"
"${PROGRAMFILES32}/IntelSWTools/compilers_and_libraries/windows/tbb"
"${PROGRAMFILES32}/Intel/Composer XE/tbb"
"${PROGRAMFILES32}/Intel/compilers_and_libraries/windows/tbb")
else()
set(TBB_HINTS "/usr/local")
set(TBB_PATHS
"${PROJECT_SOURCE_DIR}/tbb"
"/opt/intel/composerxe/tbb"
"/opt/intel/compilers_and_libraries/tbb"
"/opt/intel/compilers_and_libraries/linux/tbb"
"/opt/intel/tbb")
endif()
set(TBB_ROOT "TBB_ROOT-NOTFOUND")
find_path(TBB_ROOT
NAMES "${_TBB_INCLUDE_SUBDIR}/${_TBB_HEADER}"
HINTS ${TBB_HINTS}
PATHS ${TBB_PATHS}
NO_PACKAGE_ROOT_PATH)
endif()
endfunction()
#===============================================================================
# Find the include directory if a manual override is not specified.
# Assumes TBB_ROOT to be set.
#===============================================================================
function(rk_tbb_find_include_directory)
find_path(TBB_INCLUDE_DIR
NAMES "${_TBB_HEADER}"
HINTS "${TBB_ROOT}/${_TBB_INCLUDE_SUBDIR}"
NO_PACKAGE_ROOT_PATH)
endfunction()
#===============================================================================
# Find a specific library and create a target for it.
#===============================================================================
function(rk_tbb_find_library COMPONENT_NAME BUILD_CONFIG)
set(LIB_VAR "${COMPONENT_NAME}_LIBRARY_${BUILD_CONFIG}")
set(BIN_DIR_VAR "${COMPONENT_NAME}_BIN_DIR_${BUILD_CONFIG}")
set(DLL_VAR "${COMPONENT_NAME}_DLL_${BUILD_CONFIG}")
if (BUILD_CONFIG STREQUAL "DEBUG")
set(LIB_NAME "${COMPONENT_NAME}_debug")
else()
set(LIB_NAME "${COMPONENT_NAME}")
endif()
unset(LIB_PATHS)
if (WIN32)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(TBB_ARCH intel64)
else()
set(TBB_ARCH ia32)
endif()
if(MSVC10)
set(TBB_VCVER vc10)
elseif(MSVC11)
set(TBB_VCVER vc11)
elseif(MSVC12)
set(TBB_VCVER vc12)
else()
set(TBB_VCVER vc14)
endif()
set(LIB_PATHS
${TBB_ROOT}/lib/${TBB_ARCH}/${TBB_VCVER}
${TBB_ROOT}/lib
)
# On window, also search the DLL so that the client may install it.
set(DLL_NAME "${LIB_NAME}.dll")
# lib name with version suffix to handle oneTBB tbb12.dll
set(LIB_NAME_VERSION "")
if (${COMPONENT_NAME} STREQUAL "tbb")
if (BUILD_CONFIG STREQUAL "DEBUG")
set(LIB_NAME_VERSION "tbb12_debug")
else()
set(LIB_NAME_VERSION "tbb12")
endif()
endif()
set(DLL_NAME_VERSION "${LIB_NAME_VERSION}.dll")
find_file(BIN_FILE
NAMES ${DLL_NAME} ${DLL_NAME_VERSION}
PATHS
"${TBB_ROOT}/bin/${TBB_ARCH}/${TBB_VCVER}"
"${TBB_ROOT}/bin"
"${TBB_ROOT}/redist/${TBB_ARCH}/${TBB_VCVER}"
"${TBB_ROOT}/../redist/${TBB_ARCH}/tbb/${TBB_VCVER}"
"${TBB_ROOT}/../redist/${TBB_ARCH}_win/tbb/${TBB_VCVER}"
NO_DEFAULT_PATH)
get_filename_component(${BIN_DIR_VAR} ${BIN_FILE} DIRECTORY)
set(${DLL_VAR} "${BIN_FILE}" CACHE PATH "${COMPONENT_NAME} ${BUILD_CONFIG} dll path")
elseif(APPLE)
set(LIB_PATHS ${TBB_ROOT}/lib)
else()
file(GLOB LIB_PATHS PATHS ${TBB_ROOT}/lib/intel64/gcc*)
list(REVERSE LIB_PATHS)
list(APPEND LIB_PATHS
${TBB_ROOT}/lib
${TBB_ROOT}/lib/x86_64-linux-gnu
${TBB_ROOT}/lib64
${TBB_ROOT}/libx86_64-linux-gnu)
endif()
# We prefer finding the versioned file on Unix so that the library path
# variable will not point to a symlink. This makes installing TBB as a
# dependency easier.
if (UNIX)
set(LIB_NAME lib${LIB_NAME}.so.2 ${LIB_NAME})
endif()
find_library(${LIB_VAR}
NAMES ${LIB_NAME}
PATHS ${LIB_PATHS}
NO_DEFAULT_PATH)
# Hide this variable if we found something, otherwise display it for
# easy override.
if(${LIB_VAR})
mark_as_advanced(${LIB_VAR})
endif()
if(${BIN_DIR_VAR})
mark_as_advanced(${BIN_DIR_VAR})
endif()
if(${DLL_VAR})
mark_as_advanced(${DLL_VAR})
endif()
endfunction()
#===============================================================================
# Find the given component.
# This macro attempts to find both release and debug versions, and falls back
# appropriately if only one can be found.
# On success, it creates a target ${TARGET}::${COMPONENT_NAME} and links
# it to the overall ${TARGET}.
#
# For more information on the variables set here, see
# https://cmake.org/cmake/help/v3.17/manual/cmake-developer.7.html#a-sample-find-module
#===============================================================================
function(rk_tbb_find_and_link_component COMPONENT_NAME)
set(COMPONENT_TARGET "TBB::${COMPONENT_NAME}")
rk_tbb_find_library("${COMPONENT_NAME}" RELEASE)
rk_tbb_find_library("${COMPONENT_NAME}" DEBUG)
if (${COMPONENT_NAME}_LIBRARY_RELEASE OR ${COMPONENT_NAME}_LIBRARY_DEBUG)
# Note: We *must* use SHARED here rather than UNKNOWN as our
# IMPORTED_NO_SONAME trick a few lines down does not work with
# UNKNOWN.
add_library(${COMPONENT_TARGET} SHARED IMPORTED)
if (${COMPONENT_NAME}_LIBRARY_RELEASE)
set_property(TARGET ${COMPONENT_TARGET} APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
if(WIN32)
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_LOCATION_RELEASE "${${COMPONENT_NAME}_DLL_RELEASE}")
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_IMPLIB_RELEASE "${${COMPONENT_NAME}_LIBRARY_RELEASE}")
else()
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_LOCATION_RELEASE "${${COMPONENT_NAME}_LIBRARY_RELEASE}")
endif()
endif()
if (${COMPONENT_NAME}_LIBRARY_DEBUG)
set_property(TARGET ${COMPONENT_TARGET} APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
if(WIN32)
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_LOCATION_DEBUG "${${COMPONENT_NAME}_DLL_DEBUG}")
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_IMPLIB_DEBUG "${${COMPONENT_NAME}_LIBRARY_DEBUG}")
else()
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_LOCATION_DEBUG "${${COMPONENT_NAME}_LIBRARY_DEBUG}")
endif()
endif()
set_target_properties(${COMPONENT_TARGET} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}"
INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1"
)
if(NOT WIN32)
# Note: IMPORTED_NO_SONAME must be set or cmake will attempt
# to link to the full path of libtbb.so. Instead, we
# rely on the linker to find libtbb.so.2.
set_target_properties(${COMPONENT_TARGET} PROPERTIES
IMPORTED_NO_SONAME TRUE
)
endif()
target_link_libraries(TBB INTERFACE ${COMPONENT_TARGET})
endif()
endfunction()
#===============================================================================
# Note: The order of these is important.
# Some of these macros create variables that are used in later calls.
rk_tbb_list_components()
rk_tbb_reuse_existing_target_components()
rk_tbb_find_root()
if (NOT EXISTS "${TBB_ROOT}")
rk_tbb_error("Unable to find root directory ${TBB_ROOT}")
endif()
mark_as_advanced(TBB_ROOT) # Hide, we found something.
rk_tbb_find_include_directory()
if (NOT EXISTS "${TBB_INCLUDE_DIR}")
rk_tbb_error("Unable to find include directory ${TBB_INCLUDE_DIR}")
endif()
mark_as_advanced(TBB_INCLUDE_DIR) # Hide, we found something.
rk_tbb_check_version()
add_library(TBB INTERFACE)
foreach(C IN LISTS _REQUIRED_COMPONENTS _OPTIONAL_COMPONENTS)
rk_tbb_find_and_link_component(${C})
endforeach()
rk_tbb_check_components()
if (_TBB_MISSING_COMPONENTS)
rk_tbb_error("Cannot find required components: "
"${_TBB_MISSING_COMPONENTS}")
endif()
set(TBB_FOUND TRUE)
set(TBB_INCLUDE_DIRS "${TBB_INCLUDE_DIR}")

View file

@ -0,0 +1,13 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if !defined(__ARM_NEON)
#error "No ARM Neon support"
#endif
#include <arm_neon.h>
int main()
{
return vaddvq_s32(vdupq_n_s32(1));
}

View file

@ -0,0 +1,20 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
IF (WIN32 OR APPLE)
return()
ENDIF()
execute_process(COMMAND objdump -C -t ${file} OUTPUT_VARIABLE output)
string(REPLACE "\n" ";" output ${output})
foreach (line ${output})
if ("${line}" MATCHES "O .bss")
if (NOT "${line}" MATCHES "std::__ioinit" AND # this is caused by iostream initialization and is likely also ok
NOT "${line}" MATCHES "\\(\\)::" AND # this matches a static inside a function which is fine
NOT "${line}" MATCHES "function_local_static_" AND # static variable inside a function (explicitly named)
NOT "${line}" MATCHES "__\\$U") # ICC generated locks for static variable inside a function
message(WARNING "\nProblematic global variable in non-SSE code:\n" ${line})
endif()
endif()
endforeach()

View file

@ -0,0 +1,36 @@
// ======================================================================== //
// Copyright 2017 Kitware, Inc. //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ======================================================================== //
#if \
defined(__AVX512F__) && defined(__AVX512CD__) && \
defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
char const *info_isa = "ISA" ":" "AVX512";
#elif defined(__AVX2__)
char const *info_isa = "ISA" ":" "AVX2";
#elif defined(__AVX__)
char const *info_isa = "ISA" ":" "AVX";
#elif defined(__SSE4_2__)
char const *info_isa = "ISA" ":" "SSE42";
#else // defined(__SSE2__)
char const *info_isa = "ISA" ":" "SSE2";
#endif
int main(int argc, char **argv)
{
int require = 0;
require += info_isa[argc];
return require;
}

View file

@ -0,0 +1,43 @@
## ======================================================================== ##
## Copyright 2017 Kitware, Inc. ##
## ##
## Licensed under the Apache License, Version 2.0 (the "License"); ##
## you may not use this file except in compliance with the License. ##
## You may obtain a copy of the License at ##
## ##
## http://www.apache.org/licenses/LICENSE-2.0 ##
## ##
## Unless required by applicable law or agreed to in writing, software ##
## distributed under the License is distributed on an "AS IS" BASIS, ##
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
## See the License for the specific language governing permissions and ##
## limitations under the License. ##
## ======================================================================== ##
SET(CHECK_ISA_DIR "${CMAKE_CURRENT_LIST_DIR}")
FUNCTION(CHECK_ISA_DEFAULT OUTVAR)
TRY_COMPILE(COMPILER_SUPPORTS_ARM_NEON "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_arm_neon.cpp")
IF (COMPILER_SUPPORTS_ARM_NEON)
SET(ISA_DEFAULT "NEON")
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
RETURN()
ENDIF()
SET(ISA_DEFAULT_BIN "${CMAKE_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/check_isa_default.bin")
SET(SRC "${CHECK_ISA_DIR}/check_isa.cpp")
TRY_COMPILE(ISA_DEFAULT_COMPILE
"${CMAKE_BINARY_DIR}"
"${SRC}"
COPY_FILE "${ISA_DEFAULT_BIN}"
)
IF(NOT ISA_DEFAULT_COMPILE)
SET(ISA_DEFAULT "SSE2")
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
RETURN()
ENDIF()
FILE(STRINGS ${ISA_DEFAULT_BIN} ISA_DEFAULT REGEX "^ISA:")
STRING(REPLACE "ISA:" "" ISA_DEFAULT "${ISA_DEFAULT}")
SET(${OUTVAR} ${ISA_DEFAULT} PARENT_SCOPE)
ENDFUNCTION()

View file

@ -0,0 +1,24 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
IF (WIN32 OR APPLE)
return()
ENDIF()
execute_process(COMMAND objdump -d ${file} OUTPUT_VARIABLE output)
string(REPLACE "\n" ";" output ${output})
SET(lastfunc "")
foreach (line ${output})
if ("${line}" MATCHES "^.*<([^>]*)>:$")
SET(lastfunc ${CMAKE_MATCH_1})
endif()
if ("${line}" MATCHES ".*sub[ ]+[$]([^,]*),%rsp.*")
set(bytes ${CMAKE_MATCH_1})
if ("${bytes}" GREATER "4096")
if ("${lastfunc}" MATCHES ".*recurse.*")
message(WARNING "Large stack space requirement: ${lastfunc} size: ${bytes}")
endif()
endif()
endif()
endforeach()

View file

@ -0,0 +1,146 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
MACRO(_SET_IF_EMPTY VAR VALUE)
IF(NOT ${VAR})
SET(${VAR} "${VALUE}")
ENDIF()
ENDMACRO()
IF (EMBREE_ARM)
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__")
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__")
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
ELSE ()
# for `thread` keyword
_SET_IF_EMPTY(FLAGS_SSE2 "-msse -msse2 -mno-sse4.2")
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
ENDIF ()
IF (WIN32)
SET(COMMON_CXX_FLAGS "")
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
# SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} -Xclang -fcxx-exceptions") # enable C++ exceptions in Clang
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /w") # disable all warnings
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Gy") # package individual functions
IF (EMBREE_STACK_PROTECTOR)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
ELSE()
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
ENDIF()
ENDMACRO()
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(SECURE_LINKER_FLAGS "")
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
ENDIF()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
INCLUDE(msvc_post)
ELSE()
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
OPTION(EMBREE_ADDRESS_SANITIZER "Enabled CLANG address sanitizer." OFF)
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
SET(CMAKE_CXX_FLAGS "")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
IF (NOT APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
IF (EMBREE_STACK_PROTECTOR)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
ENDIF()
ENDMACRO()
IF (EMBREE_ADDRESS_SANITIZER)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer -fno-optimize-sibling-calls")
ENDIF()
IF (EMSCRIPTEN)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") # enable exceptions
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # enable threads
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msimd128") # enable SIMD intrinsics
ENDIF()
SET(CMAKE_CXX_FLAGS_DEBUG "")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
IF (APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
ELSE(APPLE)
IF (NOT EMBREE_ADDRESS_SANITIZER) # for address sanitizer this causes link errors
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
IF (NOT EMSCRIPTEN)
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
ENDIF()
ENDIF()
ENDIF(APPLE)
ENDIF()

View file

@ -0,0 +1,23 @@
## ======================================================================== ##
## Copyright 2017 Kitware, Inc. ##
## ##
## Licensed under the Apache License, Version 2.0 (the "License"); ##
## you may not use this file except in compliance with the License. ##
## You may obtain a copy of the License at ##
## ##
## http://www.apache.org/licenses/LICENSE-2.0 ##
## ##
## Unless required by applicable law or agreed to in writing, software ##
## distributed under the License is distributed on an "AS IS" BASIS, ##
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
## See the License for the specific language governing permissions and ##
## limitations under the License. ##
## ======================================================================== ##
SET(FLAGS_SSE2 "-target-cpu=x86_64")
SET(FLAGS_SSE42 "NOT_SUPPORTED")
SET(FLAGS_AVX "-target-cpu=sandybridge")
SET(FLAGS_AVX2 "-target-cpu=haswell")
SET(FLAGS_AVX512 "-target-cpu=x86-skylake")
STRING(TOLOWER "${CMAKE_CXX_COMPILER_ID}" _lower_compiler_id)
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/${_lower_compiler_id}.cmake" OPTIONAL)

View file

@ -0,0 +1,4 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
file(WRITE ${dst} "#include \"${src}\"\n")

View file

@ -0,0 +1,292 @@
## Copyright 2009-2022 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
MACRO(_SET_IF_EMPTY VAR VALUE)
IF(NOT ${VAR})
SET(${VAR} "${VALUE}")
ENDIF()
ENDMACRO()
_SET_IF_EMPTY(FLAGS_SSE2 "-msse2")
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
IF (NOT WIN32)
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
SET(CMAKE_CXX_FLAGS "")
ENDIF()
ENDIF()
GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
GET_FILENAME_COMPONENT(SYCL_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME_WE)
IF (NOT SYCL_COMPILER_NAME STREQUAL "clang++")
SET(SYCL_ONEAPI TRUE)
IF (SYCL_COMPILER_NAME STREQUAL "icx" OR SYCL_COMPILER_NAME STREQUAL "icpx")
SET(SYCL_ONEAPI_ICX TRUE)
ELSE()
SET(SYCL_ONEAPI_ICX FALSE)
ENDIF()
SET(STORE_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(STORE_CMAKE_CXX_LINK_FLAGS ${CMAKE_CXX_LINK_FLAGS})
IF (NOT EMBREE_SYCL_SUPPORT)
# if EMBREE_SYCL_SUPPORT is off we don't want the -fsycl flags
SET(CMAKE_CXX_FLAGS ${STORE_CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_LINK_FLAGS ${STORE_CMAKE_CXX_LINK_FLAGS})
ENDIF()
ELSE()
SET(SYCL_ONEAPI FALSE)
ADD_DEFINITIONS(-D__INTEL_LLVM_COMPILER)
ENDIF()
IF (EMBREE_SYCL_SUPPORT)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-sycl") # makes dpcpp compiler compatible with clang++
SET(CMAKE_CXX_FLAGS_SYCL "-fsycl -fsycl-unnamed-lambda -Xclang -fsycl-allow-func-ptr")
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-mismatched-tags -Wno-pessimizing-move -Wno-reorder -Wno-unneeded-internal-declaration -Wno-delete-non-abstract-non-virtual-dtor -Wno-dangling-field -Wno-unknown-pragmas -Wno-logical-op-parentheses")
IF (SYCL_ONEAPI_ICX AND WIN32)
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} /debug:none") # FIXME: debug information generation takes forever in SYCL
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} /DNDEBUG") # FIXME: debug information generation takes forever in SYCL
ELSE()
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -g0") # FIXME: debug information generation takes forever in SYCL
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -UDEBUG -DNDEBUG") # FIXME: assertion still not working in SYCL
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} -Wno-bitwise-instead-of-logical") # disables "use of bitwise '&' with boolean operands" warning
IF (WIN32)
SET(SYCL_COMPILER_LIB_DIR "${SYCL_COMPILER_DIR}/../lib")
IF (CMAKE_BUILD_TYPE STREQUAL "Debug")
file(GLOB SYCL_LIB RELATIVE ${SYCL_COMPILER_LIB_DIR}
${SYCL_COMPILER_LIB_DIR}/sycld.lib
${SYCL_COMPILER_LIB_DIR}/sycl[0-9]d.lib
${SYCL_COMPILER_LIB_DIR}/sycl[0-9][0-9]d.lib)
ELSE()
file(GLOB SYCL_LIB RELATIVE ${SYCL_COMPILER_LIB_DIR}
${SYCL_COMPILER_LIB_DIR}/sycl.lib
${SYCL_COMPILER_LIB_DIR}/sycl[0-9].lib
${SYCL_COMPILER_LIB_DIR}/sycl[0-9][0-9].lib)
ENDIF()
GET_FILENAME_COMPONENT(SYCL_LIB_NAME ${SYCL_LIB} NAME_WE)
ELSE()
SET(SYCL_LIB_NAME "sycl")
ENDIF()
SET(CMAKE_LINK_FLAGS_SYCL "-fsycl")
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableOCLNoInlineAttr=0") # enabled __noinline
#LIST(APPEND CMAKE_IGC_OPTIONS "ControlKernelTotalSize=0")
#LIST(APPEND CMAKE_IGC_OPTIONS "SubroutineThreshold=110000") # Minimal kernel size to enable subroutines
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableUnmaskedFunctions=1") # enables unmasked functions
#LIST(APPEND CMAKE_IGC_OPTIONS "ByPassAllocaSizeHeuristic=64") # puts small arrays into registers
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableIndirectCallOptimization=0") # Enables inlining indirect calls by comparing function addresses
#LIST(APPEND CMAKE_IGC_OPTIONS "FunctionControl=0") # 0 = default, 1 = inline, 2 = subroutine, 3 = stackcall, 4 = keep indirect calls
#LIST(APPEND CMAKE_IGC_OPTIONS "forceGlobalRA=1") # "force global register allocator
#LIST(APPEND CMAKE_IGC_OPTIONS "TotalGRFNum=128") # Total GRF used for register allocation
#LIST(APPEND CMAKE_IGC_OPTIONS "GRFNumToUse=64") # "Set the number of general registers to use (64 to totalGRFNum)
#LIST(APPEND CMAKE_IGC_OPTIONS "ReplaceIndirectCallWithJmpi=1") # Replace indirect call with jmpi instruction (HW WA)
#LIST(APPEND CMAKE_IGC_OPTIONS "DisableUniformAnalysis=1") # Setting this to 1/true adds a compiler switch to disable uniform_analysis
#LIST(APPEND CMAKE_IGC_OPTIONS "DisableLoopUnroll=1") # Setting this to 1/true adds a compiler switch to disable loop unrolling
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableStatelessToStatefull=0") # Enable Stateless To Statefull transformation for global and constant address space in OpenCL kernels
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableRecursionOpenCL=1") # Enable recursion with OpenCL user functions
#LIST(APPEND CMAKE_IGC_OPTIONS "EnableAdvMemOpt=0") # Enable advanced memory optimization
#LIST(APPEND CMAKE_IGC_OPTIONS "UniformMemOptLimit=512") # "Limit of uniform memory optimization in bits
#LIST(APPEND CMAKE_IGC_OPTIONS "EnablePreemption=0") # Enable generating preeemptable code (SKL+)
#LIST(APPEND CMAKE_IGC_OPTIONS "AllowSubroutineAndInirectdCalls=1") # Allow subroutine in the presence of indirect calls
#LIST(APPEND CMAKE_IGC_OPTIONS "AllocaRAPressureThreshold=0") # The threshold for the register pressure potential (this reduces amount of spilling!)
#LIST(APPEND CMAKE_IGC_OPTIONS "AssumeInt64Support=0") # Architecture with partial int64 still promote uniform arrays to registers
LIST(APPEND CMAKE_IGC_OPTIONS "VISAOptions=-scratchAllocForStackInKB 128 ") # this works around some IGC bug in spill compression
IF (CMAKE_BUILD_TYPE STREQUAL "Debug") # to allow printf inside indirectly callable function
LIST(APPEND CMAKE_IGC_OPTIONS "ForceInlineStackCallWithImplArg=0")
LIST(APPEND CMAKE_IGC_OPTIONS "EnableGlobalStateBuffer=1")
ENDIF()
STRING(REPLACE ";" "," CMAKE_IGC_OPTIONS "${CMAKE_IGC_OPTIONS}")
IF (EMBREE_SYCL_AOT_DEVICE_REVISION GREATER 0)
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -revision_id ${EMBREE_SYCL_AOT_DEVICE_REVISION}") # Enable this to override the stepping/RevId
ENDIF()
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -cl-intel-greater-than-4GB-buffer-required") # enables support for buffers larger than 4GB
IF (EMBREE_SYCL_LARGEGRF)
SET(CMAKE_OCL_OPTIONS "${CMAKE_OCL_OPTIONS} -internal_options -cl-intel-256-GRF-per-thread") # large GRF mode
ENDIF()
SET(CMAKE_OCL_OTHER_OPTIONS "${CMAKE_OCL_OTHER_OPTIONS} -cl-intel-force-global-mem-allocation -cl-intel-no-local-to-generic")
#SET(CMAKE_OCL_OTHER_OPTIONS "${CMAKE_OCL_OTHER_OPTIONS} -cl-intel-private-memory-minimal-size-per-thread 8192")
IF (EMBREE_SYCL_AOT_DEVICES STREQUAL "none")
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-fsycl-targets=spir64")
ELSE()
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-fsycl-targets=spir64,spir64_gen")
ENDIF()
SET(CMAKE_LINK_FLAGS_SYCL_AOT "${CMAKE_CXX_FLAGS_SYCL_AOT} -Xsycl-target-backend=spir64 \"${CMAKE_OCL_OPTIONS} -options \\\"${CMAKE_OCL_OTHER_OPTIONS} -igc_opts='${CMAKE_IGC_OPTIONS}'\\\"\"")
IF (NOT EMBREE_SYCL_AOT_DEVICES STREQUAL "none")
SET(CMAKE_LINK_FLAGS_SYCL_AOT "${CMAKE_LINK_FLAGS_SYCL_AOT} -Xsycl-target-backend=spir64_gen \"-device ${EMBREE_SYCL_AOT_DEVICES} ${CMAKE_OCL_OPTIONS} -options \\\"${CMAKE_OCL_OTHER_OPTIONS} -igc_opts='${CMAKE_IGC_OPTIONS}'\\\"\"")
ENDIF()
IF (EMBREE_SYCL_DBG)
SET(CMAKE_CXX_FLAGS_SYCL_AOT "-g")
ENDIF()
SET(CMAKE_CXX_FLAGS_SYCL "${CMAKE_CXX_FLAGS_SYCL} ${CMAKE_CXX_FLAGS_SYCL_AOT}")
SET(CMAKE_LINK_FLAGS_SYCL "${CMAKE_LINK_FLAGS_SYCL} ${CMAKE_LINK_FLAGS_SYCL_AOT}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pessimizing-move") # disabled: warning: moving a temporary object prevents copy elision [-Wpessimizing-move]
IF (SYCL_ONEAPI_ICX AND WIN32)
IF (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 2024.0)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I\"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl\" -I\"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl/sycl\"") # disable warning from SYCL header
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I\"${SYCL_COMPILER_DIR}/../include/sycl\" -I\"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
IF (SYCL_ONEAPI_ICX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 2024.0)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../opt/compiler/include/sycl/sycl\"") # disable warning from SYCL header
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header
ENDIF()
# enable C++17 features
IF (SYCL_ONEAPI_ICX AND WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
ENDIF()
ENDIF(EMBREE_SYCL_SUPPORT)
IF(SYCL_ONEAPI_ICX)
IF (WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qno-intel-lib")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qno-intel-lib")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qimf-use-svml:false")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qimf-use-svml:false")
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /Qno-intel-lib")
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} /Qno-intel-lib")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /Qoption,link,/DEPENDENTLOADFLAG:0x2000")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /Qoption,link,/DEPENDENTLOADFLAG:0x2000")
ELSE()
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -static-intel")
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -static-intel")
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fimf-use-svml=false")
#SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fimf-use-svml=false")
IF (NOT EMBREE_SYCL_SUPPORT)
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -no-intel-lib")
SET(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -no-intel-lib")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-intel-lib")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -no-intel-lib")
ENDIF()
ENDIF()
ENDIF()
IF (EMBREE_STACK_PROTECTOR)
IF (SYCL_ONEAPI_ICX AND WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS") # protects against return address overrides
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
ENDIF()
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
IF (SYCL_ONEAPI_ICX AND WIN32)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
ELSE()
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
ENDIF()
ENDIF()
ENDMACRO()
IF (SYCL_ONEAPI_ICX AND WIN32)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") # makes dpcpp compiler compatible with clang++
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR") # enable runtime type information (on by default)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fcxx-exceptions") # enable C++ exceptions in Clang
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Gy") # package individual functions
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffp-model=precise") # makes dpcpp compiler compatible with clang++
ENDIF()
IF (WIN32)
IF (NOT EMBREE_SYCL_SUPPORT)
IF (SYCL_ONEAPI_ICX)
IF (${MSVC_VERSION} VERSION_GREATER_EQUAL 1916)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++14")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++11")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Oi")
ELSE()
IF (${MSVC_VERSION} VERSION_GREATER_EQUAL 1916)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
ENDIF()
ENDIF()
ENDIF()
INCLUDE(msvc_post)
# workaround for file encoding problems of kernels/embree.rc found here https://gitlab.kitware.com/cmake/cmake/-/issues/18311
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
OPTION(EMBREE_ADDRESS_SANITIZER "Enabled CLANG address sanitizer." OFF)
IF (EMBREE_ADDRESS_SANITIZER)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer -fno-optimize-sibling-calls")
ENDIF()
SET(CMAKE_CXX_FLAGS_DEBUG "")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
IF(EMBREE_SYCL_SUPPORT)
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "")
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "${CMAKE_CXX_FLAGS_RELWITHASSERT} -DDEBUG") # enable assertions
SET(CMAKE_CXX_FLAGS_RELWITHASSERT "${CMAKE_CXX_FLAGS_RELWITHASSERT} -O3") # enable full optimizations
ENDIF(EMBREE_SYCL_SUPPORT)
IF (NOT EMBREE_ADDRESS_SANITIZER) # for address sanitizer this causes link errors
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
ENDIF()
ENDIF()

View file

@ -0,0 +1,11 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
# use default install config
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/embree-config-install.cmake")
# and override path variables to match for build directory
SET(EMBREE_INCLUDE_DIRS @PROJECT_SOURCE_DIR@/include)
SET(EMBREE_LIBRARY @PROJECT_BINARY_DIR@/@EMBREE_LIBRARY_FULLNAME@)
SET(EMBREE_LIBRARIES ${EMBREE_LIBRARY})

View file

@ -0,0 +1,16 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
SET(PACKAGE_VERSION @EMBREE_VERSION@)
SET(PACKAGE_VERSION_EXACT 0)
SET(PACKAGE_VERSION_COMPATIBLE 0)
IF (PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
SET(PACKAGE_VERSION_EXACT 1)
SET(PACKAGE_VERSION_COMPATIBLE 1)
ENDIF()
IF (PACKAGE_FIND_VERSION_MAJOR EQUAL @EMBREE_VERSION_MAJOR@ AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
SET(PACKAGE_VERSION_COMPATIBLE 1)
ENDIF()

View file

@ -0,0 +1,112 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
SET(EMBREE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/@EMBREE_RELATIVE_ROOT_DIR@")
GET_FILENAME_COMPONENT(EMBREE_ROOT_DIR "${EMBREE_ROOT_DIR}" ABSOLUTE)
SET(EMBREE_INCLUDE_DIRS "${EMBREE_ROOT_DIR}/@CMAKE_INSTALL_INCLUDEDIR@")
SET(EMBREE_LIBRARY "${EMBREE_ROOT_DIR}/@CMAKE_INSTALL_LIBDIR@/@EMBREE_LIBRARY_FULLNAME@")
SET(EMBREE_LIBRARIES ${EMBREE_LIBRARY})
SET(EMBREE_VERSION @EMBREE_VERSION@)
SET(EMBREE_VERSION_MAJOR @EMBREE_VERSION_MAJOR@)
SET(EMBREE_VERSION_MINOR @EMBREE_VERSION_MINOR@)
SET(EMBREE_VERSION_PATCH @EMBREE_VERSION_PATCH@)
SET(EMBREE_VERSION_NOTE "@EMBREE_VERSION_NOTE@")
SET(EMBREE_MAX_ISA @EMBREE_MAX_ISA@)
SET(EMBREE_ISA_SSE2 @EMBREE_ISA_SSE2@)
SET(EMBREE_ISA_SSE42 @EMBREE_ISA_SSE42@)
SET(EMBREE_ISA_AVX @EMBREE_ISA_AVX@)
SET(EMBREE_ISA_AVX2 @EMBREE_ISA_AVX2@)
SET(EMBREE_ISA_AVX512 @EMBREE_ISA_AVX512@)
SET(EMBREE_ISA_AVX512SKX @EMBREE_ISA_AVX512@) # just for compatibility
SET(EMBREE_ISA_NEON @EMBREE_ISA_NEON@)
SET(EMBREE_ISA_NEON2X @EMBREE_ISA_NEON2X@)
SET(EMBREE_BUILD_TYPE @CMAKE_BUILD_TYPE@)
SET(EMBREE_ISPC_SUPPORT @EMBREE_ISPC_SUPPORT@)
SET(EMBREE_STATIC_LIB @EMBREE_STATIC_LIB@)
SET(EMBREE_SYCL_SUPPORT @EMBREE_SYCL_SUPPORT@)
SET(EMBREE_SYCL_GEOMETRY_CALLBACK @EMBREE_SYCL_GEOMETRY_CALLBACK@)
SET(EMBREE_TUTORIALS @EMBREE_TUTORIALS@)
SET(EMBREE_RAY_MASK @EMBREE_RAY_MASK@)
SET(EMBREE_STAT_COUNTERS @EMBREE_STAT_COUNTERS@)
SET(EMBREE_BACKFACE_CULLING @EMBREE_BACKFACE_CULLING@)
SET(EMBREE_FILTER_FUNCTION @EMBREE_FILTER_FUNCTION@)
SET(EMBREE_IGNORE_INVALID_RAYS @EMBREE_IGNORE_INVALID_RAYS@)
SET(EMBREE_TASKING_SYSTEM @EMBREE_TASKING_SYSTEM@)
SET(EMBREE_TBB_COMPONENT @EMBREE_TBB_COMPONENT@)
SET(EMBREE_COMPACT_POLYS @EMBREE_COMPACT_POLYS@)
SET(EMBREE_GEOMETRY_TRIANGLE @EMBREE_GEOMETRY_TRIANGLE@)
SET(EMBREE_GEOMETRY_QUAD @EMBREE_GEOMETRY_QUAD@)
SET(EMBREE_GEOMETRY_CURVE @EMBREE_GEOMETRY_CURVE@)
SET(EMBREE_GEOMETRY_SUBDIVISION @EMBREE_GEOMETRY_SUBDIVISION@)
SET(EMBREE_GEOMETRY_USER @EMBREE_GEOMETRY_USER@)
SET(EMBREE_GEOMETRY_INSTANCE @EMBREE_GEOMETRY_INSTANCE@)
SET(EMBREE_GEOMETRY_INSTANCE_ARRAY @EMBREE_GEOMETRY_INSTANCE_ARRAY@)
SET(EMBREE_GEOMETRY_GRID @EMBREE_GEOMETRY_GRID@)
SET(EMBREE_GEOMETRY_POINT @EMBREE_GEOMETRY_POINT@)
SET(EMBREE_RAY_PACKETS @EMBREE_RAY_PACKETS@)
SET(EMBREE_MAX_INSTANCE_LEVEL_COUNT @EMBREE_MAX_INSTANCE_LEVEL_COUNT@)
SET(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR @EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR@)
SET(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE @EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE@)
SET(EMBREE_MIN_WIDTH @EMBREE_MIN_WIDTH@)
IF (EMBREE_STATIC_LIB AND (EMBREE_TASKING_SYSTEM STREQUAL "TBB"))
INCLUDE(CMakeFindDependencyMacro)
FIND_DEPENDENCY(TBB)
ENDIF()
IF (EMBREE_STATIC_LIB)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/sys-targets.cmake")
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/math-targets.cmake")
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/simd-targets.cmake")
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/lexers-targets.cmake")
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/tasking-targets.cmake")
IF (EMBREE_ISA_SSE42)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_sse42-targets.cmake")
ENDIF()
IF (EMBREE_ISA_AVX)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx-targets.cmake")
ENDIF()
IF (EMBREE_ISA_AVX2)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx2-targets.cmake")
ENDIF()
IF (EMBREE_ISA_AVX512)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx512-targets.cmake")
ENDIF()
ENDIF()
IF (EMBREE_SYCL_SUPPORT)
SET(EMBREE_SYCL_AOT_DEVICES @EMBREE_SYCL_AOT_DEVICES@)
SET(EMBREE_SYCL_LARGEGRF @EMBREE_SYCL_LARGEGRF@)
SET(EMBREE_SYCL_RT_VALIDATION_API @EMBREE_SYCL_RT_VALIDATION_API@)
IF (EMBREE_SYCL_RT_VALIDATION_API)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_rthwif_sycl-targets.cmake")
ENDIF()
IF (EMBREE_STATIC_LIB)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_rthwif-targets.cmake")
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/ze_wrapper-targets.cmake")
ENDIF()
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_sycl-targets.cmake")
ENDIF()
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree-targets.cmake")
SET(EMBREE_TESTING_ONLY_SYCL_TESTS @EMBREE_TESTING_ONLY_SYCL_TESTS@)
SET(EMBREE_TESTING_INTENSITY @EMBREE_TESTING_INTENSITY@)
SET(EMBREE_TESTING_MEMCHECK @EMBREE_TESTING_MEMCHECK@)
SET(EMBREE_TESTING_BENCHMARK @EMBREE_TESTING_BENCHMARK@)

View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.cs.disable-library-validation</key>
<true/>
</dict>
</plist>

View file

@ -0,0 +1,105 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
MACRO(_SET_IF_EMPTY VAR VALUE)
IF(NOT ${VAR})
SET(${VAR} "${VALUE}")
ENDIF()
ENDMACRO()
IF (EMBREE_ARM)
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__")
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__")
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
ELSE ()
_SET_IF_EMPTY(FLAGS_SSE2 "-msse2")
_SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-mavx512f -mavx512dq -mavx512cd -mavx512bw -mavx512vl -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mprefer-vector-width=256")
ENDIF ()
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
SET(CMAKE_CXX_FLAGS "")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
IF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat 'char' as 'signed char'
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions") # allow lax vector type conversions
ENDIF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") # disables clearing an object of type XXX with no trivial copy-assignment; use assignment or value-initialization instead
# these prevent compile to optimize away security checks
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around.
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM
IF (NOT WIN32)
IF (NOT APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
ENDIF ()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize") # disable auto vectorizer
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
IF (EMBREE_STACK_PROTECTOR)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
ENDIF()
ENDMACRO()
SET(CMAKE_CXX_FLAGS_DEBUG "")
IF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsigned-char") # treat 'char' as 'signed char'
ENDIF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "")
IF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fsigned-char") # treat 'char' as 'signed char'
ENDIF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
IF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fsigned-char") # treat 'char' as 'signed char'
ENDIF (EMBREE_ARM)
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
IF (APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
ELSE(APPLE)
IF (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
ENDIF ()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
ENDIF(APPLE)

View file

@ -0,0 +1,25 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
IF (EMBREE_INSTALL_DEPENDENCIES)
IF (TARGET TBB::${EMBREE_TBB_COMPONENT})
GET_TARGET_PROPERTY(LIB_PATH TBB::${EMBREE_TBB_COMPONENT} IMPORTED_LOCATION_RELEASE)
IF(WIN32)
INSTALL(FILES "${LIB_PATH}" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
GET_TARGET_PROPERTY(IMPLIB_PATH TBB::${EMBREE_TBB_COMPONENT} IMPORTED_IMPLIB_RELEASE)
INSTALL(FILES "${IMPLIB_PATH}" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
# Include tbbmalloc.dll even though we don't use or need tbb_malloc.dll because it is searched for by tbb.dll.
GET_FILENAME_COMPONENT(LIB_FOLDER ${LIB_PATH} DIRECTORY)
FILE(GLOB TBB_MALLOC_FILES LIST_DIRECTORIES false "${LIB_FOLDER}/tbbmalloc.dll" "${LIB_FOLDER}/tbbmalloc?.dll")
INSTALL(FILES "${TBB_MALLOC_FILES}" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
ELSE()
GET_FILENAME_COMPONENT(LIB_DIR "${LIB_PATH}" DIRECTORY)
FILE(GLOB LIB_FILES ${LIB_DIR}/libtbb.* ${LIB_DIR}/libtbbmalloc.so*)
INSTALL(FILES ${LIB_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
ELSE()
MESSAGE(SEND_ERROR "Target TBB::${EMBREE_TBB_COMPONENT} not found during install.")
ENDIF()
ENDIF()

View file

@ -0,0 +1,176 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
MACRO(_SET_IF_EMPTY VAR VALUE)
IF(NOT ${VAR})
SET(${VAR} "${VALUE}")
ENDIF()
ENDMACRO()
IF (WIN32)
_SET_IF_EMPTY(FLAGS_SSE2 "/QxSSE2")
_SET_IF_EMPTY(FLAGS_SSE42 "/QxSSE4.2")
_SET_IF_EMPTY(FLAGS_AVX "/arch:AVX")
_SET_IF_EMPTY(FLAGS_AVX2 "/QxCORE-AVX2")
_SET_IF_EMPTY(FLAGS_AVX512 "/QxCORE-AVX512")
SET(COMMON_CXX_FLAGS "")
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qvec-") # disable auto vectorizer
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qfast-transcendentals-") # disable fast transcendentals, prevents sin(x),cos(x) -> sincos(x) optimization
IF (EMBREE_STACK_PROTECTOR)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
ELSE()
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
ENDIF()
ENDMACRO()
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qdiag-disable:11074 ") # remark #11074: Inlining inhibited by limit max-size
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Qdiag-disable:11075 ") # remark #11075: To get full report use -Qopt-report:4 -Qopt-report-phase ipo
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gy") # package individual functions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Qinline-max-total-size-") # no size limit when performing inlining
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Qinline-factor=150") # increase default inline factors by 2x
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
# SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /DTBB_USE_DEBUG") # configures TBB in debug mode
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gy") # package individual functions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Qinline-max-total-size-") # no size limit when performing inlining
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Qinline-factor=150") # increase default inline factors by 2x
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(SECURE_LINKER_FLAGS "")
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
ENDIF()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4217") # locally defined symbol XXX imported in function YYY (happens as the ISPC API layer uses exported library functions)
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4049") # warning LNK4049: locally defined symbol _rtcOccluded1M imported
INCLUDE(msvc_post)
# remove libmmd dependency
IF (NOT EMBREE_STATIC_RUNTIME)
# Use the default math library instead of libmmd[d]
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /nodefaultlib:libmmdd.lib")
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO " /nodefaultlib:libmmd.lib")
STRING(APPEND CMAKE_EXE_LINKER_FLAGS_RELEASE " /nodefaultlib:libmmd.lib")
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_DEBUG " /nodefaultlib:libmmdd.lib")
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO " /nodefaultlib:libmmd.lib")
STRING(APPEND CMAKE_SHARED_LINKER_FLAGS_RELEASE " /nodefaultlib:libmmd.lib")
# Link the static version of SVML
string(APPEND CMAKE_EXE_LINKER_FLAGS " /defaultlib:svml_dispmt.lib")
string(APPEND CMAKE_SHARED_LINKER_FLAGS " /defaultlib:svml_dispmt.lib")
ENDIF()
ELSE()
IF (APPLE)
_SET_IF_EMPTY(FLAGS_SSE2 "-xssse3") # on MacOSX ICC does not support SSE2
ELSE()
_SET_IF_EMPTY(FLAGS_SSE2 "-xsse2")
ENDIF()
_SET_IF_EMPTY(FLAGS_SSE42 "-xsse4.2")
_SET_IF_EMPTY(FLAGS_AVX "-xAVX")
_SET_IF_EMPTY(FLAGS_AVX2 "-xCORE-AVX2")
_SET_IF_EMPTY(FLAGS_AVX512 "-xCORE-AVX512")
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
IF (EMBREE_IGNORE_CMAKE_CXX_FLAGS)
SET(CMAKE_CXX_FLAGS "")
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings
# these prevent compile to optimize away security checks
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around.
IF (NOT APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftls-model=local-dynamic") # otherwise ICC2019 cannot compile code with -fPIE enabled
ENDIF()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") # generate position independent code suitable for shared libraries
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enables C++11 features
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") # makes all symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") # makes all inline symbols hidden by default
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-ansi-alias") # disables strict aliasing rules
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-vec") # disable auto vectorizer
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fasm-blocks") # enable assembly blocks
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls
IF (EMBREE_STACK_PROTECTOR)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "-fno-stack-protector")
ENDIF()
ENDMACRO()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict") # enable restrict keyword
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-inline-max-total-size") # no size limit when performing inlining
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-inline-max-per-compile") # no maximal number of inlinings per compilation unit
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -inline-factor=150") # increase default inline factors by 2x
IF(NOT CMAKE_CXX_COMPILER_WRAPPER STREQUAL "CrayPrgEnv")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-intel") # links intel runtime statically
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-intel-extensions") # disables some intel extensions which cause symbols to be exported
ENDIF()
SET(CMAKE_CXX_FLAGS_DEBUG "")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") # enable assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTBB_USE_DEBUG") # configure TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g") # generate debug information
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG") # disable assertions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations
IF (APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features
ELSE(APPLE)
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable
ENDIF(APPLE)
ENDIF()

View file

@ -0,0 +1,211 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
# ##################################################################
# add macro INCLUDE_DIRECTORIES_ISPC() that allows to specify search
# paths for ISPC sources
# ##################################################################
SET(ISPC_INCLUDE_DIR "")
MACRO (INCLUDE_DIRECTORIES_ISPC)
SET(ISPC_INCLUDE_DIR ${ISPC_INCLUDE_DIR} ${ARGN})
ENDMACRO ()
IF (EMBREE_ISPC_SUPPORT)
# ISPC versions to look for, in descending order (newest first)
SET(ISPC_VERSION_WORKING "1.9.1" "1.9.0" "1.8.3" "1.8.2")
LIST(GET ISPC_VERSION_WORKING -1 ISPC_VERSION_REQUIRED)
IF (NOT EMBREE_ISPC_EXECUTABLE)
# try sibling folder as hint for path of ISPC
IF (APPLE)
SET(ISPC_DIR_SUFFIX "osx")
ELSEIF(WIN32)
SET(ISPC_DIR_SUFFIX "windows")
IF (MSVC14)
LIST(APPEND ISPC_DIR_SUFFIX "windows-vs2015")
ELSE()
LIST(APPEND ISPC_DIR_SUFFIX "windows-vs2013")
ENDIF()
ELSE()
SET(ISPC_DIR_SUFFIX "linux")
ENDIF()
FOREACH(ver ${ISPC_VERSION_WORKING})
FOREACH(suffix ${ISPC_DIR_SUFFIX})
LIST(APPEND ISPC_DIR_HINT "${PROJECT_SOURCE_DIR}/../ispc-v${ver}-${suffix}")
ENDFOREACH()
ENDFOREACH()
FIND_PROGRAM(EMBREE_ISPC_EXECUTABLE ispc PATHS ${ISPC_DIR_HINT} DOC "Path to the ISPC executable.")
IF (NOT EMBREE_ISPC_EXECUTABLE)
MESSAGE(FATAL_ERROR "Intel SPMD Compiler (ISPC) not found. Disable EMBREE_ISPC_SUPPORT or install ISPC.")
ENDIF()
ENDIF()
# check ISPC version
EXECUTE_PROCESS(COMMAND ${EMBREE_ISPC_EXECUTABLE} --version
OUTPUT_VARIABLE ISPC_OUTPUT
RESULT_VARIABLE ISPC_RESULT)
IF (NOT ${ISPC_RESULT} STREQUAL "0")
MESSAGE(FATAL_ERROR "Error executing ISPC executable '${EMBREE_ISPC_EXECUTABLE}': ${ISPC_RESULT}")
ELSE()
MESSAGE(STATUS "Found working Intel SPMD Compiler (ISPC): ${EMBREE_ISPC_EXECUTABLE}")
ENDIF()
STRING(REGEX MATCH "([0-9]+[.][0-9]+[.][0-9]+)" DUMMY "${ISPC_OUTPUT}")
SET(ISPC_VERSION ${CMAKE_MATCH_1})
IF (ISPC_VERSION VERSION_LESS ISPC_VERSION_REQUIRED)
MESSAGE(FATAL_ERROR "ISPC ${ISPC_VERSION} is too old. You need at least ISPC ${ISPC_VERSION_REQUIRED}.")
ENDIF()
GET_FILENAME_COMPONENT(ISPC_DIR ${EMBREE_ISPC_EXECUTABLE} PATH)
SET(EMBREE_ISPC_ADDRESSING 32 CACHE STRING "32vs64 bit addressing in ispc")
SET_PROPERTY(CACHE EMBREE_ISPC_ADDRESSING PROPERTY STRINGS 32 64)
MARK_AS_ADVANCED(EMBREE_ISPC_ADDRESSING)
MACRO (ISPC_COMPILE)
SET(ISPC_ADDITIONAL_ARGS "")
SET(ISPC_TARGET_EXT ${CMAKE_CXX_OUTPUT_EXTENSION})
STRING(REPLACE ";" "," ISPC_TARGET_ARGS "${ISPC_TARGETS}")
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
IF (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64|aarch64")
SET(ISPC_ARCHITECTURE "aarch64")
ELSE()
SET(ISPC_ARCHITECTURE "x86-64")
ENDIF()
ELSE()
SET(ISPC_ARCHITECTURE "x86")
ENDIF()
SET(ISPC_TARGET_DIR "${CMAKE_CURRENT_BINARY_DIR}")
IF(ISPC_INCLUDE_DIR)
STRING(REPLACE ";" ";-I;" ISPC_INCLUDE_DIR_PARMS "${ISPC_INCLUDE_DIR}")
SET(ISPC_INCLUDE_DIR_PARMS "-I" ${ISPC_INCLUDE_DIR_PARMS})
ENDIF()
IF (WIN32 OR "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
SET(ISPC_OPT_FLAGS -O3)
ELSE()
SET(ISPC_OPT_FLAGS -O2)
ENDIF()
IF (WIN32)
SET(ISPC_ADDITIONAL_ARGS ${ISPC_ADDITIONAL_ARGS} --dllexport)
ELSE()
SET(ISPC_ADDITIONAL_ARGS ${ISPC_ADDITIONAL_ARGS} --pic)
ENDIF()
SET(ISPC_OBJECTS "")
FOREACH(src ${ARGN})
GET_FILENAME_COMPONENT(fname ${src} NAME_WE)
GET_FILENAME_COMPONENT(dir ${src} PATH)
SET(outdir "${ISPC_TARGET_DIR}/${dir}")
SET(input "${CMAKE_CURRENT_SOURCE_DIR}/${src}")
SET(deps "")
IF (EXISTS ${outdir}/${fname}.dev.idep)
FILE(READ ${outdir}/${fname}.dev.idep contents)
STRING(REPLACE " " ";" contents "${contents}")
STRING(REPLACE ";" "\\\\;" contents "${contents}")
STRING(REPLACE "\n" ";" contents "${contents}")
FOREACH(dep ${contents})
IF (EXISTS ${dep})
SET(deps ${deps} ${dep})
ENDIF (EXISTS ${dep})
ENDFOREACH(dep ${contents})
ENDIF ()
SET(results "${outdir}/${fname}.dev${ISPC_TARGET_EXT}")
# if we have multiple targets add additional object files
LIST(LENGTH ISPC_TARGETS NUM_TARGETS)
IF (NUM_TARGETS GREATER 1)
FOREACH(target ${ISPC_TARGETS})
IF (${target} STREQUAL "avx512skx-i32x16")
SET(target "avx512skx")
ENDIF()
SET(results ${results} "${outdir}/${fname}.dev_${target}${ISPC_TARGET_EXT}")
ENDFOREACH()
ENDIF()
ADD_CUSTOM_COMMAND(
OUTPUT ${results} "${ISPC_TARGET_DIR}/${fname}_ispc.h"
COMMAND ${CMAKE_COMMAND} -E make_directory ${outdir}
COMMAND ${EMBREE_ISPC_EXECUTABLE}
-I "${CMAKE_CURRENT_SOURCE_DIR}"
${ISPC_INCLUDE_DIR_PARMS}
${ISPC_DEFINITIONS}
--arch=${ISPC_ARCHITECTURE}
--addressing=${EMBREE_ISPC_ADDRESSING}
${ISPC_OPT_FLAGS}
--target=${ISPC_TARGET_ARGS}
--woff
--opt=fast-math
${ISPC_ADDITIONAL_ARGS}
-h "${ISPC_TARGET_DIR}/${fname}_ispc.h"
-MMM ${outdir}/${fname}.dev.idep
-o ${outdir}/${fname}.dev${ISPC_TARGET_EXT}
${input}
DEPENDS ${input} ${deps}
COMMENT "Building ISPC object ${outdir}/${fname}.dev${ISPC_TARGET_EXT}"
)
SET(ISPC_OBJECTS ${ISPC_OBJECTS} ${results})
ENDFOREACH()
ENDMACRO()
MACRO (ADD_EMBREE_ISPC_EXECUTABLE name)
SET(ISPC_SOURCES "")
SET(OTHER_SOURCES "")
FOREACH(src ${ARGN})
GET_FILENAME_COMPONENT(ext ${src} EXT)
IF (ext STREQUAL ".ispc")
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
ELSE ()
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
ENDIF ()
ENDFOREACH()
ISPC_COMPILE(${ISPC_SOURCES})
ADD_EXECUTABLE(${name} ${ISPC_OBJECTS} ${OTHER_SOURCES})
ENDMACRO()
MACRO (ADD_ISPC_LIBRARY name type)
SET(ISPC_SOURCES "")
SET(OTHER_SOURCES "")
FOREACH(src ${ARGN})
GET_FILENAME_COMPONENT(ext ${src} EXT)
IF (ext STREQUAL ".ispc")
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
ELSE ()
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
ENDIF ()
ENDFOREACH()
ISPC_COMPILE(${ISPC_SOURCES})
ADD_LIBRARY(${name} ${type} ${ISPC_OBJECTS} ${OTHER_SOURCES})
ENDMACRO()
ELSE (EMBREE_ISPC_SUPPORT)
MACRO (ADD_ISPC_LIBRARY name type)
SET(ISPC_SOURCES "")
SET(OTHER_SOURCES "")
FOREACH(src ${ARGN})
GET_FILENAME_COMPONENT(ext ${src} EXT)
IF (ext STREQUAL ".ispc")
SET(ISPC_SOURCES ${ISPC_SOURCES} ${src})
ELSE ()
SET(OTHER_SOURCES ${OTHER_SOURCES} ${src})
ENDIF ()
ENDFOREACH()
ADD_LIBRARY(${name} ${type} ${OTHER_SOURCES})
ENDMACRO()
ENDIF (EMBREE_ISPC_SUPPORT)

View file

@ -0,0 +1,60 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
SET(FLAGS_SSE2 "/D__SSE__ /D__SSE2__")
SET(FLAGS_SSE42 "${FLAGS_SSE2} /D__SSE3__ /D__SSSE3__ /D__SSE4_1__ /D__SSE4_2__")
SET(FLAGS_AVX "${FLAGS_SSE42} /arch:AVX")
SET(FLAGS_AVX2 "${FLAGS_SSE42} /arch:AVX2")
SET(COMMON_CXX_FLAGS "")
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /MP") # compile source files in parallel
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GR") # enable runtime type information (on by default)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /Gy") # package individual functions
IF (EMBREE_STACK_PROTECTOR)
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS") # protects against return address overrides
ELSE()
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /GS-") # do not protect against return address overrides
ENDIF()
MACRO(DISABLE_STACK_PROTECTOR_FOR_FILE file)
IF (EMBREE_STACK_PROTECTOR)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "/GS-")
ENDIF()
ENDMACRO()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEPENDENTLOADFLAG:0x2000")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEPENDENTLOADFLAG:0x2000")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS}")
STRING(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) # disable native runtime checks
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DDEBUG") # enables assertions
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /DTBB_USE_DEBUG") # configures TBB in debug mode
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Oi") # inline intrinsic functions
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /DEBUG") # generate debug information
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi") # inline intrinsic functions
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") # enable full optimizations
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Oi") # inline intrinsic functions
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG") # generate debug information
SET(SECURE_LINKER_FLAGS "")
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time
IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design)
ENDIF()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4217") # locally defined symbol XXX imported in function YYY (happens as the ISPC API layer uses exported library functions)
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4049") # warning LNK4049: locally defined symbol _rtcOccluded1M imported
INCLUDE(msvc_post)

View file

@ -0,0 +1,13 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
# optionally use static runtime library
OPTION(EMBREE_STATIC_RUNTIME "Use the static version of the C/C++ runtime library." OFF)
IF (EMBREE_STATIC_RUNTIME)
STRING(REPLACE "/MDd" "/MTd" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
STRING(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
STRING(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
ENDIF()
# remove define NDEBUG and instead set define DEBUG for config RelWithDebInfo
STRING(REPLACE "NDEBUG" "DEBUG" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})

View file

@ -0,0 +1,359 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
INCLUDE(GNUInstallDirs)
IF (NOT EMBREE_ZIP_MODE AND NOT WIN32 AND NOT APPLE)
SET(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_BINDIR}/embree${EMBREE_VERSION_MAJOR}")
SET(CMAKE_INSTALL_FULL_BINDIR "${CMAKE_INSTALL_FULL_BINDIR}/embree${EMBREE_VERSION_MAJOR}")
ENDIF()
# use full absolute path as install name
IF (NOT EMBREE_ZIP_MODE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_FULL_LIBDIR}")
ELSE()
IF(APPLE)
SET(CMAKE_INSTALL_RPATH "@loader_path/../${CMAKE_INSTALL_LIBDIR}")
ELSE()
SET(CMAKE_INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}")
ENDIF()
ENDIF()
##############################################################
# Install SYCL specific files
##############################################################
# SYCL library
IF (EMBREE_SYCL_SUPPORT AND EMBREE_INSTALL_DEPENDENCIES)
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
IF (WIN32)
FILE(GLOB LIB_SYCL_LIB_FILES LIST_DIRECTORIES FALSE
"${DPCPP_COMPILER_DIR}/../lib/sycl.lib"
"${DPCPP_COMPILER_DIR}/../lib/sycl?.lib")
INSTALL(FILES ${LIB_SYCL_LIB_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
FILE(GLOB LIB_SYCL_DLL_FILES LIST_DIRECTORIES FALSE
"${DPCPP_COMPILER_DIR}/../bin/sycl.dll"
"${DPCPP_COMPILER_DIR}/../bin/sycl?.dll")
INSTALL(FILES ${LIB_SYCL_DLL_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
INSTALL(FILES "${DPCPP_COMPILER_DIR}/pi_level_zero.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
INSTALL(FILES "${DPCPP_COMPILER_DIR}/win_proxy_loader.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib OPTIONAL)
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/libmmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/pi_win_proxy_loader.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/pi_win_proxy_loader.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
ELSE()
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so"
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?"
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?"
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?.?"
"${DPCPP_COMPILER_DIR}/../lib/libsycl.so.?.?.?-?")
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libpi_level_zero.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so")
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so"
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so.?")
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
ENDIF()
ENDIF()
##############################################################
# Install MSVC runtime
##############################################################
IF (WIN32)
IF(SYCL_ONEAPI_ICX AND EMBREE_INSTALL_DEPENDENCIES)
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}//libmmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/libmmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../redist/intel64_win/compiler/svml_dispmd.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ENDIF()
ENDIF()
SET(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE)
SET(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS TRUE)
INCLUDE(InstallRequiredSystemLibraries)
LIST(FILTER CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS INCLUDE REGEX ".*msvcp[0-9]+\.dll|.*vcruntime[0-9]+\.dll|.*vcruntime[0-9]+_[0-9]+\.dll")
INSTALL(FILES ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib)
ELSE()
IF(SYCL_ONEAPI_ICX AND EMBREE_INSTALL_DEPENDENCIES)
GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH)
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libsvml.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libirng.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so")
INSTALL(FILES "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libimf.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
IF (EXISTS "${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so")
FILE(GLOB LIB_SYCL_FILES LIST_DIRECTORIES FALSE
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so"
"${DPCPP_COMPILER_DIR}/../compiler/lib/intel64_lin/libintlc.so.?")
INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib)
ENDIF()
ENDIF()
ENDIF()
##############################################################
# Install Headers
##############################################################
INSTALL(DIRECTORY include/embree4 DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT devel)
IF (NOT WIN32)
INSTALL(DIRECTORY man/man3 DESTINATION "${CMAKE_INSTALL_MANDIR}" COMPONENT devel)
ENDIF()
##############################################################
# Install Models
##############################################################
IF (EMBREE_TUTORIALS)
INSTALL(DIRECTORY tutorials/models DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
ENDIF()
##############################################################
# Install Documentation
##############################################################
INSTALL(FILES "${PROJECT_SOURCE_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/CHANGELOG.md" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/README.md" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/readme.pdf" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-TBB.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-OIDN.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-DPCPP.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-oneAPI-DPCPP.txt" DESTINATION "${CMAKE_INSTALL_DOCDIR}" COMPONENT lib)
##############################################################
# Install scripts to set embree paths
##############################################################
IF (EMBREE_ZIP_MODE)
IF (WIN32)
ELSEIF(APPLE)
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_macosx/embree-vars.sh" embree-vars.sh @ONLY)
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_macosx/embree-vars.csh" embree-vars.csh @ONLY)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.sh" DESTINATION "." COMPONENT lib)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.csh" DESTINATION "." COMPONENT lib)
ELSE()
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_linux/embree-vars.sh" embree-vars.sh @ONLY)
CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/scripts/install_linux/embree-vars.csh" embree-vars.csh @ONLY)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.sh" DESTINATION "." COMPONENT lib)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-vars.csh" DESTINATION "." COMPONENT lib)
ENDIF()
ENDIF()
##############################################################
# Install Embree CMake Configuration
##############################################################
IF (NOT EMBREE_ZIP_MODE)
SET(EMBREE_CONFIG_VERSION ${EMBREE_VERSION})
ELSE()
SET(EMBREE_CONFIG_VERSION ${EMBREE_VERSION_MAJOR})
ENDIF()
# why does this have to be so complicated...
IF (EMBREE_STATIC_LIB)
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_STATIC_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
ELSE()
IF (WIN32)
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_IMPORT_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX})
ELSEIF (APPLE)
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_SHARED_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}.${EMBREE_CONFIG_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX})
ELSE()
SET(EMBREE_LIBRARY_FULLNAME ${CMAKE_SHARED_LIBRARY_PREFIX}${EMBREE_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${EMBREE_CONFIG_VERSION})
ENDIF()
ENDIF()
#IF (WIN32 OR EMBREE_ZIP_MODE)
# for local "installs" and on Windows we want the cmake config files placed
# in the install root, such that users can point the CMake variable
# embree_DIR just to the install folder
# SET(EMBREE_CMAKECONFIG_DIR ".")
# SET(EMBREE_CMAKEEXPORT_DIR "cmake")
# SET(EMBREE_RELATIVE_ROOT_DIR ".")
#ELSE()
SET(EMBREE_CMAKECONFIG_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/embree-${EMBREE_VERSION}")
SET(EMBREE_CMAKEEXPORT_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/embree-${EMBREE_VERSION}")
IF (WIN32)
SET(EMBREE_RELATIVE_ROOT_DIR "../../../")
ELSE()
FILE(RELATIVE_PATH EMBREE_RELATIVE_ROOT_DIR "/${EMBREE_CMAKECONFIG_DIR}" "/")
ENDIF()
#ENDIF()
CONFIGURE_FILE(common/cmake/embree-config.cmake embree-config-install.cmake @ONLY)
CONFIGURE_FILE(common/cmake/embree-config-version.cmake embree-config-version.cmake @ONLY)
# create a config file for the build directory
CONFIGURE_FILE(common/cmake/embree-config-builddir.cmake embree-config.cmake @ONLY)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-config-install.cmake" DESTINATION "${EMBREE_CMAKECONFIG_DIR}" RENAME "embree-config.cmake" COMPONENT devel)
INSTALL(FILES "${PROJECT_BINARY_DIR}/embree-config-version.cmake" DESTINATION "${EMBREE_CMAKECONFIG_DIR}" COMPONENT devel)
##############################################################
# CPack specific stuff
##############################################################
SET(CPACK_PACKAGE_NAME "Intel(R) Embree Ray Tracing Kernels")
SET(CPACK_PACKAGE_FILE_NAME "embree-${EMBREE_VERSION}${EMBREE_VERSION_NOTE}")
IF (EMBREE_SYCL_SUPPORT)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.sycl")
SET(EMBREE_VERSION_SUFFIX)
ENDIF()
#SET(CPACK_PACKAGE_ICON "${PROJECT_SOURCE_DIR}/embree-doc/images/icon.png")
#SET(CPACK_PACKAGE_RELOCATABLE TRUE)
SET(CPACK_STRIP_FILES TRUE)
SET(CPACK_PACKAGE_VERSION_MAJOR ${EMBREE_VERSION_MAJOR})
SET(CPACK_PACKAGE_VERSION_MINOR ${EMBREE_VERSION_MINOR})
SET(CPACK_PACKAGE_VERSION_PATCH ${EMBREE_VERSION_PATCH})
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Intel(R) Embree implements high performance ray tracing kernels including acceleration structure construction and traversal.")
SET(CPACK_PACKAGE_VENDOR "Intel Corporation")
SET(CPACK_PACKAGE_CONTACT embree_support@intel.com)
SET(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
SET(CPACK_COMPONENTS_GROUPING ONE_PER_GROUP)
SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library")
SET(CPACK_COMPONENT_LIB_DESCRIPTION "The Embree library including documentation.")
SET(CPACK_COMPONENT_LIB_GROUP "embree")
SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development")
SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Header Files for C and ISPC required to develop applications with Embree.")
SET(CPACK_COMPONENT_DEVEL_GROUP "embree")
SET(CPACK_COMPONENT_EXAMPLES_DISPLAY_NAME "Examples")
SET(CPACK_COMPONENT_EXAMPLES_DESCRIPTION "Tutorials demonstrating how to use Embree.")
SET(CPACK_COMPONENT_EXAMPLES_GROUP "embree")
SET(CPACK_COMPONENT_TESTING_DISPLAY_NAME "Testing")
SET(CPACK_COMPONENT_TESTING_DESCRIPTION "Models and reference images for tests")
SET(CPACK_COMPONENT_TESTING_GROUP "embree-testing")
# dependencies between components
#SET(CPACK_COMPONENT_DEVEL_DEPENDS lib)
#SET(CPACK_COMPONENT_EXAMPLES_DEPENDS lib)
#SET(CPACK_COMPONENT_LIB_REQUIRED ON) # always install the libs
# point to readme and license files
SET(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md")
SET(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE.txt")
# Windows specific settings
IF(WIN32)
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
SET(ARCH x64)
SET(CPACK_PACKAGE_NAME "${CPACK_PACKAGE_NAME} x64")
ELSE()
SET(ARCH win32)
SET(CPACK_PACKAGE_NAME "${CPACK_PACKAGE_NAME} Win32")
ENDIF()
SET(CPACK_GENERATOR ZIP)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.${ARCH}.windows")
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
SET(PACKAGE_EXT "zip")
# MacOSX specific settings
ELSEIF(APPLE)
CONFIGURE_FILE(README.md README.txt)
SET(CPACK_RESOURCE_FILE_README "${PROJECT_BINARY_DIR}/README.txt")
SET(CPACK_GENERATOR ZIP)
IF (EMBREE_ARM)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.arm64.macosx")
ELSE()
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx")
ENDIF()
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
SET(PACKAGE_EXT "zip")
add_custom_target(
post_package_notarize "${PROJECT_SOURCE_DIR}/scripts/package_post_build_notarize_macosx.sh" ${PACKAGE_BASE_NAME} ${EMBREE_SIGN_FILE}
)
# Linux specific settings
ELSE()
SET(CPACK_GENERATOR TGZ)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux")
SET(PACKAGE_BASE_NAME "${CPACK_PACKAGE_FILE_NAME}")
SET(PACKAGE_EXT "tar.gz")
IF (EMBREE_SYCL_SUPPORT)
SET(EMBREE_VERSION_SYCL_SUFFIX ".sycl")
ENDIF()
ENDIF()
add_custom_target(
build ${CMAKE_COMMAND} --build . --config ${CMAKE_BUILD_TYPE} --target package -j8
COMMAND ${CMAKE_COMMAND} -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_build.cmake
)
add_custom_target(
test_package ${CMAKE_COMMAND} -DWHAT="UNPACK" -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
COMMAND cd embree_install/testing && ${CMAKE_COMMAND} -B build -DEMBREE_TESTING_INTENSITY=${EMBREE_TESTING_INTENSITY}
COMMAND ctest --test-dir ${CMAKE_CURRENT_BINARY_DIR}/embree_install/testing/build -VV -C ${CMAKE_BUILD_TYPE} --output-log ctest.output
COMMAND ${CMAKE_COMMAND} -DWHAT="CHECK" -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
)
if(WIN32)
set(INTEGRATE_BINARY "./build/Release/test.exe")
else()
set(INTEGRATE_BINARY "./build/test")
endif()
add_custom_target(
test_integration ${CMAKE_COMMAND} -DWHAT="UNPACK" -DPACKAGE_BASENAME=${PACKAGE_BASE_NAME} -DPACKAGE_EXT=${PACKAGE_EXT} -P ${PROJECT_SOURCE_DIR}/scripts/package_test.cmake
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${CMAKE_COMMAND} -B build --preset ${EMBREE_TESTING_INTEGRATION_PRESET} -Dembree_DIR="${CMAKE_CURRENT_BINARY_DIR}/embree_install/lib/cmake/embree-${EMBREE_VERSION}"
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${CMAKE_COMMAND} --build build --config Release
COMMAND cd ${PROJECT_SOURCE_DIR}/tests/integration/test_embree_release && ${INTEGRATE_BINARY}
)

View file

@ -0,0 +1,4 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
/sbin/ldconfig

View file

@ -0,0 +1,561 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
if (BUILD_TESTING OR EMBREE_TESTING_INSTALL_TESTS)
INCLUDE(CTest)
if (EMBREE_TESTING_INSTALL_TESTS)
SET(EMBREE_INSTALL_CTESTTESTFILE "${CMAKE_CURRENT_BINARY_DIR}/embree-addtests.cmake")
file(WRITE "${EMBREE_INSTALL_CTESTTESTFILE}" "")
endif()
if (NOT EMBREE_TESTING_PACKAGE_TEST_PROJECT)
IF (WIN32)
IF("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
ELSE()
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
ENDIF()
ELSE()
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
ENDIF()
else()
SET(MY_PROJECT_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${EMBREE_INSTALL_BINDIR}")
endif()
if (NOT EMBREE_TESTING_PACKAGE_TEST_PROJECT)
SET(EMBREE_TESTING_INTENSITY 1 CACHE STRING "Intensity of testing (0 = no testing, 1 = verify and tutorials, 2 = light testing, 3 = intensive testing, 4 = very intensive testing.")
SET_PROPERTY(CACHE EMBREE_TESTING_INTENSITY PROPERTY STRINGS 0 1 2 3 4)
SET(EMBREE_TESTING_ONLY_SYCL_TESTS OFF CACHE BOOL "Run only tests with the sycl support.")
SET(EMBREE_TESTING_MEMCHECK OFF CACHE BOOL "Turns on memory checking for some tests.")
SET(EMBREE_TESTING_BENCHMARK OFF CACHE BOOL "Turns benchmarking on.")
SET(EMBREE_TESTING_BENCHMARK_DATABASE "${PROJECT_BINARY_DIR}" CACHE PATH "Path to database for benchmarking.")
SET(EMBREE_TESTING_PACKAGE OFF CACHE BOOL "Packages release as test.")
SET(EMBREE_TESTING_KLOCWORK OFF CACHE BOOL "Runs Kocwork as test.")
SET(EMBREE_TESTING_SDE OFF CACHE STRING "Uses SDE to run tests for specified CPU.")
SET_PROPERTY(CACHE EMBREE_TESTING_SDE PROPERTY STRINGS OFF pnr nhm wsm snb ivb hsw bdw knl skl skx cnl)
endif()
FUNCTION (SET_EMBREE_TEST_PROPERTIES testname)
SET(variants "_cpp;_ispc;_sycl")
foreach(v ${variants})
if (v STREQUAL "_cpp")
SET(v "")
endif()
set(testnamedef "EMBREE_TEST_${testname}${v}_DEFINED")
if(${testnamedef})
SET_TESTS_PROPERTIES(${testname}${v} ${ARGN})
endif()
endforeach()
if (EMBREE_TESTING_INSTALL_TESTS)
file(APPEND "${EMBREE_INSTALL_CTESTTESTFILE}" "SET_EMBREE_TEST_PROPERTIES(${testname} ${ARGN}) \n")
endif()
ENDFUNCTION()
MACRO (ADD_EMBREE_GENERIC_TEST testname executable)
ADD_TEST(NAME ${testname}
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
COMMAND ${executable} ${ARGN})
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
SET(${testnamedef} "1" CACHE INTERNAL "")
ENDMACRO()
MACRO (ADD_EMBREE_GENERIC_CPP_TEST testname executable)
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
ADD_TEST(NAME ${testname}
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
COMMAND ${executable} ${ARGN})
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
SET(${testnamedef} "1" CACHE INTERNAL "")
endif()
ENDMACRO()
MACRO (ADD_EMBREE_GENERIC_ISPC_TEST testname executable)
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
IF (EMBREE_ISPC_SUPPORT AND EMBREE_RAY_PACKETS)
ADD_TEST(NAME ${testname}_ispc
WORKING_DIRECTORY "${MY_PROJECT_BINARY_DIR}"
COMMAND ${executable}_ispc ${ARGN})
SET(testnamedef EMBREE_TEST_${testname}_ispc_DEFINED)
SET(${testnamedef} "1" CACHE INTERNAL "")
ENDIF()
endif()
ENDMACRO()
MACRO (ADD_EMBREE_GENERIC_SYCL_TEST testname executable)
IF (EMBREE_SYCL_SUPPORT)
ADD_TEST(NAME ${testname}_sycl
WORKING_DIRECTORY ${MY_PROJECT_BINARY_DIR}
COMMAND ${executable}_sycl ${ARGN})
SET(testnamedef EMBREE_TEST_${testname}_sycl_DEFINED)
SET(${testnamedef} 1 CACHE INTERNAL "")
SET_TESTS_PROPERTIES(${testname}_sycl PROPERTIES TIMEOUT 50)
ENDIF()
ENDMACRO()
# Checks if the current cmake configuration is compatible with <condition>
# condition may be a triple of CMAKE_VARIABLE_NAME op VALUE
# supported operators for op are: ==, !=, <, <=, >, >=
FUNCTION (EMBREE_TESTING_CHECK_OPTION out condition)
# parse condition into list
string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" tokens "${condition}")
LIST(LENGTH tokens token_count)
IF (NOT ${token_count} EQUAL 3)
message(FATAL_ERROR "illegal embree_opitons condition: ${condition}")
ENDIF()
# we require every condition to follow the scheme "variable_name op value"
LIST(GET tokens 0 option)
LIST(GET tokens 1 comp)
LIST(GET tokens 2 value)
STRING(STRIP ${option} option)
STRING(STRIP ${comp} comp)
STRING(STRIP ${value} value)
SET(${out} 0 PARENT_SCOPE)
if ("${comp}" STREQUAL "==")
if ("${${option}}" STREQUAL "${value}")
SET(${out} 1 PARENT_SCOPE)
endif()
elseif ("${comp}" STREQUAL "!=")
if (NOT ("${${option}}" STREQUAL "${value}"))
SET(${out} 1 PARENT_SCOPE)
endif()
elseif ("${comp}" STREQUAL ">")
if ("${${option}}" GREATER "${value}")
SET(${out} 1 PARENT_SCOPE)
endif()
elseif ("${comp}" STREQUAL ">=")
if ("${${option}}" GREATER_EQUAL "${value}")
SET(${out} 1 PARENT_SCOPE)
endif()
elseif ("${comp}" STREQUAL "<")
if ("${${option}}" LESS "${value}")
SET(${out} 1 PARENT_SCOPE)
endif()
elseif ("${comp}" STREQUAL "<=")
if ("${${option}}" LESS_EQUAL "${value}")
SET(${out} 1 PARENT_SCOPE)
endif()
else()
message(FATAL_ERROR "Could not parse embree_option condition: ${condition}")
endif()
ENDFUNCTION()
# Checks multiple options from a list with EMBREE_TESTING_CHECK_OPTION
FUNCTION (EMBREE_TESTING_CHECK_OPTIONS_LIST out conditions)
SET(${out} 1 PARENT_SCOPE)
FOREACH (c ${conditions})
EMBREE_TESTING_CHECK_OPTION(myout ${c})
IF (myout EQUAL 0)
SET(${out} 0 PARENT_SCOPE)
BREAK()
ENDIF()
ENDFOREACH()
ENDFUNCTION()
# looks for ifile in multiple possible locations and outputs a file with absolute path in ofile
FUNCTION (EMBREE_FIND_TEST_FILE ifile ofile errmsgflavor)
if (EXISTS "${ifile}") # abs path, use get_filename_component because it could also be relative to cwd
get_filename_component(absifile "${ifile}" ABSOLUTE)
SET(${ofile} ${absifile} PARENT_SCOPE)
elseif(EXISTS "${PROJECT_SOURCE_DIR}/tests/${ifile}") # testing dir
set(${ofile} "${PROJECT_SOURCE_DIR}/tests/${ifile}" PARENT_SCOPE)
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${ifile}") # relative to source folder
set(${ofile} "${CMAKE_CURRENT_SOURCE_DIR}/${ifile}" PARENT_SCOPE)
else()
set(${ofile} "" PARENT_SCOPE)
if (errmsgflavor)
message(FATAL_ERROR
"Could not find ${errmsgflavor} \"${ifile}\"\n"
"looked for:\n"
" ${inputfile}\n"
" ${PROJECT_SOURCE_DIR}/tests/${ifile}\n"
" ${CMAKE_CURRENT_SOURCE_DIR}/${ifile}\n"
)
endif()
endif()
ENDFUNCTION()
FUNCTION (EMBREE_ADD_TEST_PARSE_SUBLIST args keyword sublist)
SET(myargs ${args})
SET(mysublist "")
SET(keywords "ECS;XML;NO_REFERENCE;REFERENCE;REFERENCE_SUFFIX;INTENSITY;CONDITION_FILE;CONDITION;ARGS;NO_CPP;NO_ISPC;NO_SYCL;GEN_REFERENCE;")
list(FIND nargs ${keyword} istart)
if (NOT(istart EQUAL -1))
list(LENGTH myargs iend)
foreach(k ${keywords})
list(FIND myargs ${k} i)
if (NOT(i EQUAL -1) AND (i GREATER istart) AND (i LESS iend))
SET(iend ${i})
endif()
endforeach()
MATH(EXPR i "${istart}+1")
while (i LESS iend)
list(GET myargs ${i} elem)
list(APPEND mysublist ${elem})
MATH(EXPR i "${i}+1")
endwhile()
endif()
SET(${sublist} ${mysublist} PARENT_SCOPE)
ENDFUNCTION()
FUNCTION (EMBREE_ADD_TEST_PARSE_FLAG args keyword value)
SET(myargs ${args})
SET(${value} OFF PARENT_SCOPE)
list(FIND nargs ${keyword} i)
if (NOT(i EQUAL -1))
SET(${value} ON PARENT_SCOPE)
endif()
ENDFUNCTION()
# ADD_EMBREE_TEST_ECS(testname exename [ECS <file> | XML <file>] [NO_REFERENCE | REFERENCE <path> | REFERENCE_SUFFIX <suffix>] [INTENSITY <i>] [CONDITION <conds>] [ARGS <args>] [GEN_REFERENCE])
# [ECS <file> | XML <inputfile> | OPTIONS <inputfile>]
# - looks for file and calls the test command with either "-c <inputfile>.ecs" or "-i <inputfile>.xml"
#
# [NO_REFERENCE | REFERENCE <path> | REFERENCE_SUFFIX <suffix>]
# - if not found a reference is will be expected in the same folder as *.ecs with name *.ecs.exename.exr
# - NO_REFERENCE: don't look for a reference (no --compare in test command)
# - REFERENCE <path>: use the reference located in <path>. Same rules aplly as for finding ecs files: absolute or relative to CMAKE_CURRENT_SOURCE_DIR path, must not be located outside the embree root dir.
# - REFERENCE_SUFFIX <suffix>: use the default reference location and name with a suffix before the last file extension, e.g. *.ecs.exename<suffix>.exr
# - if this argument is not specified, looks for
# 1. <inputfile>.exename.exr, or
# 2. <testname>.exr, if no <inputfile> was given
#
# [INTENSITY <i>]
# - default i = 1
# - sets the intensity level for the test, test is only run if ${EMBREE_TESTING_INTENSITY} GREATER_EQUAL i
# - could be done with an *.embree_options file, but this is more flexible, e.g. you can easier share reference images without specifying an absolute path
# - DOES NOT overrite EMBREE_TESTING_INTENSITY, if specified in *.embree_options
#
# [CONDITION_FILE <file>]
# - file containing additional conditions
# - conditions are specified linewise in form of: EMBREE_OPTION op VALUE, where EMBREE_OPTION is a cmake variable used during embree configuration and op is one of ==, !=, <, <=, >, >=
# - if this argument is not specified, looks for
# 1. <inputfile>.embree_options, or
# 2. <testname>.embree_options, if no <inputfile> was given
#
# [CONDITION <conds>]
# - cmake list of additional conditions, specified the same way as in an embree_options file
#
# [ARGS <args>]
# - additional arguments for the test command
#
# [GEN_REFERENCE]
# - writes the reference to the expected location
# - could also be done with the ARGS parameter, but this way we don't have to deal with paths
#
# EXAMPLES
#
# all optional arguments default,
# - looks for points.ecs.embree_options to filter out test by configured options, if not found no restrictions
# - runs for EMBREE_TESTING_INTENSITY >= 1
# - reference will be expected in the same folder as points.ecs with name points.ecs.embree_viewer.exr
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs") # 1)
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "${PROJECT_SOURCE_DIR}/models/xxx/points.ecs") # 2) same as 1) but with absolute path to ecs
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "points.ecs") # 3) same as 1) but looks for points.ecs in the ${CMAKE_CURRENT_SOURCE_DIR} i.e. <embree_root>/tutorials/viewer/points.ecs
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" ARGS --coherent INTENSITY 2) # 4) same as 1) but runs only at EMBREE_TESTING_INTENSITY >= 2
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" REFERENCE_SUFFIX "_quads" ARGS
# --coherent
# --convert-triangles-to-quads INTENSITY 2) # 5) same as 4) but expects reference name points.ecr.embree_viewer_quads.ecs
#ADD_EMBREE_TEST_ECS(viewer_points embree_viewer "models/xxx/points.ecs" REFERENCE "special.exr" ARGS
# --coherent
# --convert-triangles-to-quads INTENSITY 2) # 5) same as 4) but expects reference <embree-root>/tutorials/viewer/special.exr
## 1) - 5) all share the same base input arguments defined in points.ecs as well as the points.ecs.embree_options
#ADD_EMBREE_TEST_ECS(point_geometry embree_point_geometry) # 6) no ecs file, uses point_geometry.embree_options if exists, expects reference ${CMAKE_CURRENT_SOURCE_DIR}/point_geometry.exr
#ADD_EMBREE_TEST_ECS(verify verify NO_REFERENCE) # 7) no ecs file, uses verify.embree_options if exists, no reference
#ADD_EMBREE_TEST_ECS(verify_memcheck verify NO_REFERENCE CONDITIONS "EMBREE_TESTING_MEMCHECK == ON") # 8) same as 7) but with extra condition
FUNCTION (ADD_EMBREE_TEST_ECS testname executable)
if (EMBREE_TESTING_FILTER_TESTNAMES)
list(FIND EMBREE_TESTING_FILTER_TESTNAMES "${testname}" i)
if (${i} EQUAL -1)
return()
endif()
endif()
SET(nargs ${ARGN})
# disable everything
SET(testnamedef EMBREE_TEST_${testname}_DEFINED)
SET(${testnamedef} "0" CACHE INTERNAL "")
SET(testnamedef EMBREE_TEST_${testname}_ispc_DEFINED)
SET(${testnamedef} "0" CACHE INTERNAL "")
SET(testnamedef EMBREE_TEST_${testname}_sycl_DEFINED)
SET(${testnamedef} "0" CACHE INTERNAL "")
# parsing input file
# ECS mode -> single parameter with filename after ECS keyword
list(FIND nargs "ECS" i)
if (NOT(i EQUAL -1))
SET(inputtype "-c")
MATH(EXPR i "${i}+1")
list(GET nargs ${i} out)
EMBREE_FIND_TEST_FILE("${out}" inputfile "test file")
endif()
# XML mode -> single parameter with filename after XML keyword
list(FIND nargs "XML" i)
if (NOT(i EQUAL -1))
SET(inputtype "-i")
MATH(EXPR i "${i}+1")
list(GET nargs ${i} out)
EMBREE_FIND_TEST_FILE("${out}" inputfile "test file")
endif()
# no filetype keyword -> look for <testname>.ecs/xml
# does not need to be specified
if (NOT inputtype)
EMBREE_FIND_TEST_FILE("${testname}.ecs" inputfile "")
if(inputfile)
SET(inputtype "-c")
endif()
EMBREE_FIND_TEST_FILE("${testname}.ecs" inputfile "")
if(inputfile)
SET(inputtype "-i")
endif()
endif()
# parsing refernce image
# no reference image mode -> no additional parameter to parse
list(FIND nargs "NO_REFERENCE" i)
if (NOT(i EQUAL -1))
SET(no_reference ON)
endif()
# reference suffix mode -> singe parameter with suffix to default reference file name
list(FIND nargs "REFERENCE_SUFFIX" i)
if (NOT(i EQUAL -1))
MATH(EXPR i "${i}+1")
list(GET nargs ${i} out)
EMBREE_FIND_TEST_FILE("${inputfile}.${executable}${out}.exr" referencefile "reference image")
endif()
# reference mode -> singe parameter with absolute path to reference image
list(FIND nargs "REFERENCE" i)
if (NOT(i EQUAL -1))
MATH(EXPR i "${i}+1")
list(GET nargs ${i} out)
EMBREE_FIND_TEST_FILE("${out}" referencefile "reference image")
endif()
# no reference keyword -> look for <testname>.exr and <inputfile>.<executable>.exr respectively
if ((NOT no_reference) AND (NOT referencefile))
if (NOT inputfile)
EMBREE_FIND_TEST_FILE("${testname}.exr" referencefile "reference image")
else()
EMBREE_FIND_TEST_FILE("${inputfile}.${executable}.exr" referencefile "reference image")
endif()
endif()
# parsing intensity
# single integer parameter
SET(intensity 1)
list(FIND nargs "INTENSITY" i)
if (NOT(i EQUAL -1))
MATH(EXPR i "${i}+1")
list(GET nargs ${i} intensity)
endif()
# parsing condition
SET(conditions "")
SET(conditionsfile)
list(FIND nargs "CONDITION_FILE" i)
# condition file -> single parameter to absolute path of conditions file
if (NOT(i EQUAL -1))
list(GET nargs ${i} conditionsfile)
EMBREE_FIND_TEST_FILE("${conditionsfile}" conditionsfile "")
# no <inputfile> specified -> look for <testname>.embree_options
elseif (NOT inputtype)
EMBREE_FIND_TEST_FILE("${testname}.embree_options" conditionsfile "")
# <inputfile> specified -> look for <inputfile>.embree_options
else()
EMBREE_FIND_TEST_FILE("${inputfile}.embree_options" conditionsfile "")
endif()
if (conditionsfile)
file(READ "${conditionsfile}" lines)
string(REGEX REPLACE "\n" ";" conditions "${lines}")
endif()
# parsing additional conditions
EMBREE_ADD_TEST_PARSE_SUBLIST("${nargs}" "CONDITION" extra_conditions)
list(APPEND conditions ${extra_conditions})
# parsing extra args
EMBREE_ADD_TEST_PARSE_SUBLIST("${nargs}" "ARGS" extraargs)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "MEMCHECK" memcheck)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_CPP" no_cpp)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_ISPC" no_ispc)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_SYCL" no_sycl)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "NO_POSTFIX" no_postfix)
EMBREE_ADD_TEST_PARSE_FLAG("${nargs}" "GEN_REFERENCE" gen_reference)
# add the test, if the conditions are met
SET(matchconditions ON)
if (conditions)
EMBREE_TESTING_CHECK_OPTIONS_LIST(out "${conditions}")
if (out EQUAL 0)
SET(matchconditions OFF)
endif()
endif()
# LESS_EQUAL not supported on all CI runners
if (matchconditions AND ((intensity LESS EMBREE_TESTING_INTENSITY) OR (intensity EQUAL EMBREE_TESTING_INTENSITY)))
SET(args "")
if (inputfile)
list(APPEND args "${inputtype} ${inputfile}")
endif()
if (NOT no_reference)
list(APPEND args "--compare ${referencefile}")
endif()
if (gen_reference)
list(APPEND args "-o ${referencefile}")
endif()
foreach(a ${extraargs})
list(APPEND args "${a}")
endforeach()
if (memcheck)
list(PREPEND args "${MY_PROJECT_BINARY_DIR}/${executable}")
list(PREPEND args "${EMBREE_MEMORYCHECK_COMMAND_OPTIONS}")
if (no_postfix)
ADD_EMBREE_GENERIC_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
else()
if((NOT ${EMBREE_SYCL_SUPPORT}) OR (NOT ${EMBREE_TESTING_ONLY_SYCL_TESTS}))
if (NOT(no_cpp))
ADD_EMBREE_GENERIC_CPP_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
endif()
if (NOT(no_ispc))
ADD_EMBREE_GENERIC_ISPC_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
endif()
endif()
if (NOT(no_sycl))
ADD_EMBREE_GENERIC_SYCL_TEST(${testname} ${EMBREE_MEMORYCHECK_COMMAND} ${args})
endif()
endif()
else()
if (no_postfix)
ADD_EMBREE_GENERIC_TEST(${testname} ${executable} ${args})
else()
if (NOT(no_cpp))
ADD_EMBREE_GENERIC_CPP_TEST(${testname} ${executable} ${args})
endif()
if (NOT(no_ispc))
ADD_EMBREE_GENERIC_ISPC_TEST(${testname} ${executable} ${args})
endif()
if (NOT(no_sycl))
ADD_EMBREE_GENERIC_SYCL_TEST(${testname} ${executable} ${args})
endif()
endif()
endif()
endif()
if (EMBREE_TESTING_INSTALL_TESTS)
if (inputfile)
get_filename_component(inputpath ${inputfile} DIRECTORY)
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" inputpath "${inputpath}")
INSTALL(FILES "${inputfile}"
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${inputpath}"
COMPONENT testing)
endif()
if (conditionsfile)
get_filename_component(conditionspath ${conditionsfile} DIRECTORY)
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" conditionspath "${conditionspath}")
INSTALL(FILES "${conditionsfile}"
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${conditionspath}"
COMPONENT testing)
endif()
if (referencefile)
get_filename_component(referencepath ${referencefile} DIRECTORY)
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" referencepath "${referencepath}")
INSTALL(FILES "${referencefile}"
DESTINATION "${CMAKE_INSTALL_TESTDIR}/${referencepath}"
COMPONENT testing)
endif()
SET(testcall "ADD_EMBREE_TEST_ECS(${testname} ${executable}")
if (inputfile)
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" inputfile "${inputfile}")
if (inputtype STREQUAL "-c")
SET(testcall "${testcall} \n ECS ${inputfile}")
elseif (inputtype STREQUAL "-i")
SET(testcall "${testcall} \n XML ${inputfile}")
endif()
endif()
if (no_reference)
SET(testcall "${testcall} \n NO_REFERENCE")
else()
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" referencefile "${referencefile}")
SET(testcall "${testcall} \n REFERENCE ${referencefile}")
endif()
SET(testcall "${testcall} \n INTENSITY ${intensity}")
if (${memcheck})
SET(testcall "${testcall} \n MEMCHECK")
endif()
if (${no_cpp})
SET(testcall "${testcall} \n NO_CPP")
endif()
if (${no_ispc})
SET(testcall "${testcall} \n NO_ISPC")
endif()
if (${no_sycl})
SET(testcall "${testcall} \n NO_SYCL")
endif()
if (${no_postfix})
SET(testcall "${testcall} \n NO_POSTFIX")
endif()
STRING(REPLACE "${PROJECT_SOURCE_DIR}/" "" conditionsfile "${conditionsfile}")
SET(testcall "${testcall} \n CONDITION_FILE ${conditionsfile}")
SET(testcall "${testcall} \n CONDITION ")
foreach(c ${conditions})
SET(testcall "${testcall} \"${c}\"")
endforeach()
SET(testcall "${testcall} \n ARGS ")
foreach(a ${extraargs})
SET(testcall "${testcall} ${a}")
endforeach()
SET(testcall "${testcall})\n\n")
file(APPEND "${EMBREE_INSTALL_CTESTTESTFILE}" "${testcall}")
endif()
ENDFUNCTION()
else()
FUNCTION(ADD_EMBREE_TEST_ECS testname executable)
ENDFUNCTION()
FUNCTION(SET_EMBREE_TEST_PROPERTIES testname)
ENDFUNCTION()
endif()

View file

@ -0,0 +1,38 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
# additional parameters (beyond the name) are treated as additional dependencies
# if ADDITIONAL_LIBRARIES is set these will be included during linking
MACRO (ADD_TUTORIAL TUTORIAL_NAME)
ADD_EXECUTABLE(embree_${TUTORIAL_NAME} ../../kernels/embree.rc ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.cpp ${ARGN})
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME} embree image tutorial noise ${ADDITIONAL_LIBRARIES})
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME} PROPERTY FOLDER tutorials/single)
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME} APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
INSTALL(TARGETS embree_${TUTORIAL_NAME} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
SIGN_TARGET(embree_${TUTORIAL_NAME})
ENDMACRO ()
MACRO (ADD_TUTORIAL_SYCL TUTORIAL_NAME)
IF (EMBREE_SYCL_SUPPORT)
ADD_EXECUTABLE(embree_${TUTORIAL_NAME}_sycl ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.cpp ${ARGN})
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME}_sycl embree image tutorial_sycl noise ${ADDITIONAL_LIBRARIES})
TARGET_COMPILE_DEFINITIONS(embree_${TUTORIAL_NAME}_sycl PUBLIC EMBREE_SYCL_TUTORIAL)
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl PROPERTY FOLDER tutorials/sycl)
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST} ${CMAKE_CXX_FLAGS_SYCL}")
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_sycl APPEND PROPERTY LINK_FLAGS "${CMAKE_LINK_FLAGS_SYCL}")
INSTALL(TARGETS embree_${TUTORIAL_NAME}_sycl DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT examples)
SIGN_TARGET(embree_${TUTORIAL_NAME}_sycl)
ENDIF()
ENDMACRO ()
MACRO (ADD_TUTORIAL_ISPC TUTORIAL_NAME)
IF (EMBREE_ISPC_SUPPORT)
ADD_EMBREE_ISPC_EXECUTABLE(embree_${TUTORIAL_NAME}_ispc ../../kernels/embree.rc ${TUTORIAL_NAME}.cpp ${TUTORIAL_NAME}_device.ispc ${ARGN})
TARGET_LINK_LIBRARIES(embree_${TUTORIAL_NAME}_ispc embree image tutorial_ispc noise noise_ispc)
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_ispc PROPERTY FOLDER tutorials/ispc)
SET_PROPERTY(TARGET embree_${TUTORIAL_NAME}_ispc APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
INSTALL(TARGETS embree_${TUTORIAL_NAME}_ispc DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT examples)
SIGN_TARGET(embree_${TUTORIAL_NAME}_ispc)
ENDIF()
ENDMACRO ()

View file

@ -0,0 +1,24 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
MESSAGE(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
STRING(REGEX REPLACE "\n" ";" files "${files}")
FOREACH(file ${files})
MESSAGE(STATUS "Uninstalling $ENV{DESTDIR}${file}")
IF(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
EXEC_PROGRAM(
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
OUTPUT_VARIABLE rm_out
RETURN_VALUE rm_retval
)
IF(NOT "${rm_retval}" STREQUAL 0)
MESSAGE(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
ENDIF(NOT "${rm_retval}" STREQUAL 0)
ELSE(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
MESSAGE(STATUS "File $ENV{DESTDIR}${file} does not exist.")
ENDIF(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
ENDFOREACH(file)

View file

@ -0,0 +1,17 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
ADD_LIBRARY(lexers STATIC
stringstream.cpp
tokenstream.cpp
)
TARGET_LINK_LIBRARIES(lexers sys math)
SET_PROPERTY(TARGET lexers PROPERTY FOLDER common)
SET_PROPERTY(TARGET lexers APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
IF (EMBREE_STATIC_LIB)
INSTALL(TARGETS lexers EXPORT lexers-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
INSTALL(EXPORT lexers-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
ENDIF()

View file

@ -0,0 +1,101 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stringstream.h"
#include "../sys/filename.h"
#include "../math/vec2.h"
#include "../math/vec3.h"
#include "../math/col3.h"
#include "../math/color.h"
namespace embree
{
/*! helper class for simple command line parsing */
class ParseStream : public Stream<std::string>
{
public:
ParseStream (const Ref<Stream<std::string> >& cin) : cin(cin) {}
ParseStream (const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
const std::string& endl = "", bool multiLine = false)
: cin(new StringStream(cin,seps,endl,multiLine)) {}
public:
ParseLocation location() { return cin->loc(); }
std::string next() { return cin->get(); }
void force(const std::string& next) {
std::string token = getString();
if (token != next)
THROW_RUNTIME_ERROR("token \""+next+"\" expected but token \""+token+"\" found");
}
std::string getString() {
return get();
}
FileName getFileName() {
return FileName(get());
}
int getInt () {
return atoi(get().c_str());
}
Vec2i getVec2i() {
int x = atoi(get().c_str());
int y = atoi(get().c_str());
return Vec2i(x,y);
}
Vec3ia getVec3ia() {
int x = atoi(get().c_str());
int y = atoi(get().c_str());
int z = atoi(get().c_str());
return Vec3ia(x,y,z);
}
float getFloat() {
return (float)atof(get().c_str());
}
Vec2f getVec2f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
return Vec2f(x,y);
}
Vec3f getVec3f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Vec3f(x,y,z);
}
Vec3fa getVec3fa() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Vec3fa(x,y,z);
}
Col3f getCol3f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Col3f(x,y,z);
}
Color getColor() {
float r = (float)atof(get().c_str());
float g = (float)atof(get().c_str());
float b = (float)atof(get().c_str());
return Color(r,g,b);
}
private:
Ref<Stream<std::string> > cin;
};
}

View file

@ -0,0 +1,215 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/ref.h"
#include "../sys/filename.h"
#include "../sys/estring.h"
#include <vector>
#include <iostream>
#include <cstdio>
#include <string.h>
namespace embree
{
/*! stores the location of a stream element in the source */
class ParseLocation
{
public:
ParseLocation () : lineNumber(-1), colNumber(-1) {}
ParseLocation (std::shared_ptr<std::string> fileName, ssize_t lineNumber, ssize_t colNumber, ssize_t /*charNumber*/)
: fileName(fileName), lineNumber(lineNumber), colNumber(colNumber) {}
std::string str() const
{
std::string str = "unknown";
if (fileName) str = *fileName;
if (lineNumber >= 0) str += " line " + toString(lineNumber);
if (lineNumber >= 0 && colNumber >= 0) str += " character " + toString(colNumber);
return str;
}
private:
std::shared_ptr<std::string> fileName; /// name of the file (or stream) the token is from
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
};
/*! a stream class templated over the stream elements */
template<typename T> class Stream : public RefCount
{
enum { BUF_SIZE = 1024 };
private:
virtual T next() = 0;
virtual ParseLocation location() = 0;
__forceinline std::pair<T,ParseLocation> nextHelper() {
ParseLocation l = location();
T v = next();
return std::pair<T,ParseLocation>(v,l);
}
__forceinline void push_back(const std::pair<T,ParseLocation>& v) {
if (past+future == BUF_SIZE) pop_front();
size_t end = (start+past+future++)%BUF_SIZE;
buffer[end] = v;
}
__forceinline void pop_front() {
if (past == 0) THROW_RUNTIME_ERROR("stream buffer empty");
start = (start+1)%BUF_SIZE; past--;
}
public:
Stream () : start(0), past(0), future(0), buffer(BUF_SIZE) {}
virtual ~Stream() {}
public:
const ParseLocation& loc() {
if (future == 0) push_back(nextHelper());
return buffer[(start+past)%BUF_SIZE].second;
}
T get() {
if (future == 0) push_back(nextHelper());
T t = buffer[(start+past)%BUF_SIZE].first;
past++; future--;
return t;
}
const T& peek() {
if (future == 0) push_back(nextHelper());
return buffer[(start+past)%BUF_SIZE].first;
}
const T& unget(size_t n = 1) {
if (past < n) THROW_RUNTIME_ERROR ("cannot unget that many items");
past -= n; future += n;
return peek();
}
void drop() {
if (future == 0) push_back(nextHelper());
past++; future--;
}
private:
size_t start,past,future;
std::vector<std::pair<T,ParseLocation> > buffer;
};
/*! warps an iostream stream */
class StdStream : public Stream<int>
{
public:
StdStream (std::istream& cin, const std::string& name = "std::stream")
: cin(cin), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
~StdStream() {}
ParseLocation location() {
return ParseLocation(name,lineNumber,colNumber,charNumber);
}
int next() {
int c = cin.get();
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
std::istream& cin;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
/*! creates a stream from a file */
class FileStream : public Stream<int>
{
public:
FileStream (const FileName& fileName)
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
{
if (ifs) ifs.close();
ifs.open(fileName.str());
if (!ifs.is_open()) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
}
~FileStream() {
if (ifs) ifs.close();
}
public:
ParseLocation location() {
return ParseLocation(name,lineNumber,colNumber,charNumber);
}
int next() {
int c = ifs.get();
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
std::ifstream ifs;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
/*! creates a stream from a string */
class StrStream : public Stream<int>
{
public:
StrStream (const char* str)
: str(str), lineNumber(1), colNumber(0), charNumber(0) {}
public:
ParseLocation location() {
return ParseLocation(std::shared_ptr<std::string>(),lineNumber,colNumber,charNumber);
}
int next() {
int c = str[charNumber];
if (c == 0) return EOF;
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
const char* str;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
};
/*! creates a character stream from a command line */
class CommandLineStream : public Stream<int>
{
public:
CommandLineStream (int argc, char** argv, const std::string& name = "command line")
: i(0), j(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name)))
{
if (argc > 0) {
for (size_t i=0; argv[0][i] && i<1024; i++) charNumber++;
charNumber++;
}
for (ssize_t k=1; k<argc; k++) args.push_back(argv[k]);
}
~CommandLineStream() {}
public:
ParseLocation location() {
return ParseLocation(name,0,charNumber,charNumber);
}
int next() {
if (i == args.size()) return EOF;
if (j == args[i].size()) { i++; j=0; charNumber++; return ' '; }
charNumber++;
return args[i][j++];
}
private:
size_t i,j;
std::vector<std::string> args;
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
}

View file

@ -0,0 +1,39 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
namespace embree
{
/* removes all line comments from a stream */
class LineCommentFilter : public Stream<int>
{
public:
LineCommentFilter (const FileName& fileName, const std::string& lineComment)
: cin(new FileStream(fileName)), lineComment(lineComment) {}
LineCommentFilter (Ref<Stream<int> > cin, const std::string& lineComment)
: cin(cin), lineComment(lineComment) {}
ParseLocation location() { return cin->loc(); }
int next()
{
/* look if the line comment starts here */
for (size_t j=0; j<lineComment.size(); j++) {
if (cin->peek() != lineComment[j]) { cin->unget(j); goto not_found; }
cin->get();
}
/* eat all characters until the end of the line (or file) */
while (cin->peek() != '\n' && cin->peek() != EOF) cin->get();
not_found:
return cin->get();
}
private:
Ref<Stream<int> > cin;
std::string lineComment;
};
}

View file

@ -0,0 +1,48 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "stringstream.h"
namespace embree
{
static const std::string stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* simple tokenizer */
StringStream::StringStream(const Ref<Stream<int> >& cin, const std::string& seps, const std::string& endl, bool multiLine)
: cin(cin), endl(endl), multiLine(multiLine)
{
createCharMap(isSepMap,seps);
createCharMap(isValidCharMap,stringChars);
}
std::string StringStream::next()
{
/* skip separators */
while (cin->peek() != EOF) {
if (endl != "" && cin->peek() == '\n') { cin->drop(); return endl; }
if (multiLine && cin->peek() == '\\') {
cin->drop();
if (cin->peek() == '\n') { cin->drop(); continue; }
cin->unget();
}
if (!isSeparator(cin->peek())) break;
cin->drop();
}
/* parse everything until the next separator */
std::vector<char> str; str.reserve(64);
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
int c = cin->get();
if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
str.push_back((char)c);
}
str.push_back(0);
return std::string(str.data());
}
}

View file

@ -0,0 +1,29 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
namespace embree
{
/*! simple tokenizer that produces a string stream */
class StringStream : public Stream<std::string>
{
public:
StringStream(const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
const std::string& endl = "", bool multiLine = false);
public:
ParseLocation location() { return cin->loc(); }
std::string next();
private:
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
__forceinline bool isValidChar(unsigned int c) const { return c<256 && isValidCharMap[c]; }
private:
Ref<Stream<int> > cin; /*! source character stream */
bool isSepMap[256]; /*! map for fast classification of separators */
bool isValidCharMap[256]; /*! map for valid characters */
std::string endl; /*! the token of the end of line */
bool multiLine; /*! whether to parse lines wrapped with \ */
};
}

View file

@ -0,0 +1,181 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "tokenstream.h"
#include "../math/emath.h"
namespace embree
{
/* shorthands for common sets of characters */
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const std::string TokenStream::numbers = "0123456789";
const std::string TokenStream::separators = "\n\t\r ";
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* build full tokenizer that takes list of valid characters and keywords */
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols) //< symbols
: cin(cin), symbols(symbols)
{
createCharMap(isAlphaMap,alpha);
createCharMap(isSepMap,seps);
createCharMap(isStringCharMap,stringChars);
}
bool TokenStream::decDigits(std::string& str_o)
{
bool ok = false;
std::string str;
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str;
else cin->unget(str.size());
return ok;
}
bool TokenStream::decDigits1(std::string& str_o)
{
bool ok = false;
std::string str;
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str; else cin->unget(str.size());
return ok;
}
bool TokenStream::trySymbol(const std::string& symbol)
{
size_t pos = 0;
while (pos < symbol.size()) {
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
cin->drop(); pos++;
}
return true;
}
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
{
for (size_t i=0; i<symbols.size(); i++) {
if (!trySymbol(symbols[i])) continue;
token = Token(symbols[i],Token::TY_SYMBOL,loc);
return true;
}
return false;
}
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
{
bool ok = false;
std::string str;
if (trySymbol("nan")) {
token = Token(float(nan));
return true;
}
if (trySymbol("+inf")) {
token = Token(float(pos_inf));
return true;
}
if (trySymbol("-inf")) {
token = Token(float(neg_inf));
return true;
}
if (decDigits(str))
{
if (cin->peek() == '.') {
str += (char)cin->get();
decDigits(str);
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1.[2]E2
}
else ok = true; // 1.[2]
}
else if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1E2
}
}
else
{
if (cin->peek() == '.') {
str += (char)cin->get();
if (decDigits(str)) {
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // .3E2
}
else ok = true; // .3
}
}
}
if (ok) {
token = Token((float)atof(str.c_str()),loc);
}
else cin->unget(str.size());
return ok;
}
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
std::string str;
if (decDigits(str)) {
token = Token(atoi(str.c_str()),loc);
return true;
}
return false;
}
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
{
std::string str;
if (cin->peek() != '\"') return false;
cin->drop();
while (cin->peek() != '\"') {
const int c = cin->get();
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
str += (char)c;
}
cin->drop();
token = Token(str,Token::TY_STRING,loc);
return true;
}
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
{
std::string str;
if (!isAlpha(cin->peek())) return false;
str += (char)cin->get();
while (isAlphaNum(cin->peek())) str += (char)cin->get();
token = Token(str,Token::TY_IDENTIFIER,loc);
return true;
}
void TokenStream::skipSeparators()
{
/* skip separators */
while (cin->peek() != EOF && isSeparator(cin->peek()))
cin->drop();
}
Token TokenStream::next()
{
Token token;
skipSeparators();
ParseLocation loc = cin->loc();
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
if (tryFloat (token,loc)) return token; /**< try to parse float */
if (tryInt (token,loc)) return token; /**< try to parse integer */
if (tryString (token,loc)) return token; /**< try to parse string */
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
return Token((char)cin->get(),loc); /**< return invalid character token */
}
}

View file

@ -0,0 +1,164 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
#include <string>
#include <vector>
namespace embree
{
/*! token class */
class Token
{
public:
enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
static Token Eof() { return Token(); }
static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
static Token Str(std::string str) { return Token(str,TY_STRING); }
static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
char Char() const {
if (ty == TY_CHAR) return c;
THROW_RUNTIME_ERROR(loc.str()+": character expected");
}
int Int() const {
if (ty == TY_INT) return i;
THROW_RUNTIME_ERROR(loc.str()+": integer expected");
}
float Float(bool cast = true) const {
if (ty == TY_FLOAT) return f;
if (ty == TY_INT && cast) return (float)i;
THROW_RUNTIME_ERROR(loc.str()+": float expected");
}
std::string Identifier() const {
if (ty == TY_IDENTIFIER) return str;
THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
}
std::string String() const {
if (ty == TY_STRING) return str;
THROW_RUNTIME_ERROR(loc.str()+": string expected");
}
std::string Symbol() const {
if (ty == TY_SYMBOL) return str;
THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
}
const ParseLocation& Location() const { return loc; }
friend bool operator==(const Token& a, const Token& b)
{
if (a.ty != b.ty) return false;
if (a.ty == TY_CHAR) return a.c == b.c;
if (a.ty == TY_INT) return a.i == b.i;
if (a.ty == TY_FLOAT) return a.f == b.f;
if (a.ty == TY_IDENTIFIER) return a.str == b.str;
if (a.ty == TY_STRING) return a.str == b.str;
if (a.ty == TY_SYMBOL) return a.str == b.str;
return true;
}
friend bool operator!=(const Token& a, const Token& b) {
return !(a == b);
}
friend bool operator <( const Token& a, const Token& b ) {
if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
if (a.ty == TY_CHAR) return a.c < b.c;
if (a.ty == TY_INT) return a.i < b.i;
if (a.ty == TY_FLOAT) return a.f < b.f;
if (a.ty == TY_IDENTIFIER) return a.str < b.str;
if (a.ty == TY_STRING) return a.str < b.str;
if (a.ty == TY_SYMBOL) return a.str < b.str;
return false;
}
friend std::ostream& operator<<(std::ostream& cout, const Token& t)
{
if (t.ty == TY_EOF) return cout << "eof";
if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
return cout << "unknown";
}
private:
Type ty; //< the type of the token
union {
char c; //< data for char tokens
int i; //< data for int tokens
float f; //< data for float tokens
};
std::string str; //< data for string and identifier tokens
ParseLocation loc; //< the location the token is from
};
/*! build full tokenizer that takes list of valid characters and keywords */
class TokenStream : public Stream<Token>
{
public:
/*! shorthands for common sets of characters */
static const std::string alpha;
static const std::string ALPHA;
static const std::string numbers;
static const std::string separators;
static const std::string stringChars;
public:
TokenStream(const Ref<Stream<int> >& cin,
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
public:
ParseLocation location() { return cin->loc(); }
Token next();
bool trySymbol(const std::string& symbol);
private:
void skipSeparators();
bool decDigits(std::string& str);
bool decDigits1(std::string& str);
bool trySymbols(Token& token, const ParseLocation& loc);
bool tryFloat(Token& token, const ParseLocation& loc);
bool tryInt(Token& token, const ParseLocation& loc);
bool tryString(Token& token, const ParseLocation& loc);
bool tryIdentifier(Token& token, const ParseLocation& loc);
Ref<Stream<int> > cin;
bool isSepMap[256];
bool isAlphaMap[256];
bool isStringCharMap[256];
std::vector<std::string> symbols;
/*! checks if a character is a separator */
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
/*! checks if a character is a number */
__forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
/*! checks if a character is valid inside a string */
__forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
/*! checks if a character is legal for an identifier */
__forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
__forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
};
}

View file

@ -0,0 +1,12 @@
## Copyright 2009-2021 Intel Corporation
## SPDX-License-Identifier: Apache-2.0
ADD_LIBRARY(math STATIC constants.cpp)
SET_PROPERTY(TARGET math PROPERTY FOLDER common)
SET_PROPERTY(TARGET math APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}")
IF (EMBREE_STATIC_LIB)
INSTALL(TARGETS math EXPORT math-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel)
INSTALL(EXPORT math-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel)
ENDIF()

View file

@ -0,0 +1,361 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "linearspace2.h"
#include "linearspace3.h"
#include "quaternion.h"
#include "bbox.h"
#include "vec4.h"
namespace embree
{
#define VectorT typename L::Vector
#define ScalarT typename L::Vector::Scalar
////////////////////////////////////////////////////////////////////////////////
// Affine Space
////////////////////////////////////////////////////////////////////////////////
template<typename L>
struct AffineSpaceT
{
L l; /*< linear part of affine space */
VectorT p; /*< affine part of affine space */
////////////////////////////////////////////////////////////////////////////////
// Constructors, Assignment, Cast, Copy Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT ( ) { }
__forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
__forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
__forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
__forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
__forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
////////////////////////////////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
__forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
/*! return matrix for scaling */
static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
/*! return matrix for translation */
static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
/*! return matrix for rotation, only in 2D */
static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
/*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
/*! return matrix for rotation around arbitrary axis and point, only in 3D */
static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
/*! return matrix for looking at given point, only in 3D */
static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
VectorT Z = normalize(point-eye);
VectorT U = normalize(cross(up,Z));
VectorT V = normalize(cross(Z,U));
return AffineSpaceT(L(U,V,Z),eye);
}
};
// template specialization to get correct identity matrix for type AffineSpace3fa
template<>
__forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
__forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
{
BBox3fa dst = empty;
const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
return dst;
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
}
////////////////////////////////////////////////////////////////////////////////
// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
return cout << "{ l = " << m.l << ", p = " << m.p << " }";
}
////////////////////////////////////////////////////////////////////////////////
// Template Instantiations
////////////////////////////////////////////////////////////////////////////////
typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
//////////////////////////////////////////////////////////////////////////////
/// Interpolation
//////////////////////////////////////////////////////////////////////////////
template<typename T, typename R>
__forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
const AffineSpaceT<T>& M1,
const R& t)
{
return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
}
// slerp interprets the 16 floats of the matrix M = D * R * S as components of
// three matrizes (D, R, S) that are interpolated individually.
template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
const T& t)
{
QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
QuaternionT<T> q = slerp(q0, q1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
return D * R * S;
}
// this is a specialized version for Vec3fa because that does
// not play along nicely with the other templated Vec3/Vec4 types
__forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
const AffineSpace3ff& M1,
const float& t)
{
Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
Quaternion3f q = slerp(q0, q1, t);
AffineSpace3fa S = lerp(M0, M1, t);
AffineSpace3fa D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * S;
}
__forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
{
// compute affine transform from quaternion decomposition
Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
AffineSpace3fa M = qd;
AffineSpace3fa D(one);
D.p.x = M.l.vx.y;
D.p.y = M.l.vx.z;
D.p.z = M.l.vy.z;
M.l.vx.y = 0;
M.l.vx.z = 0;
M.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * M;
}
__forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
{
q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
S = qd;
T.x = qd.l.vx.y;
T.y = qd.l.vx.z;
T.z = qd.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
}
__forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
{
AffineSpace3ff M = S;
M.l.vx.w = q.i;
M.l.vy.w = q.j;
M.l.vz.w = q.k;
M.p.w = q.r;
M.l.vx.y = T.x;
M.l.vx.z = T.y;
M.l.vy.z = T.z;
return M;
}
struct __aligned(16) QuaternionDecomposition
{
float scale_x = 1.f;
float scale_y = 1.f;
float scale_z = 1.f;
float skew_xy = 0.f;
float skew_xz = 0.f;
float skew_yz = 0.f;
float shift_x = 0.f;
float shift_y = 0.f;
float shift_z = 0.f;
float quaternion_r = 1.f;
float quaternion_i = 0.f;
float quaternion_j = 0.f;
float quaternion_k = 0.f;
float translation_x = 0.f;
float translation_y = 0.f;
float translation_z = 0.f;
};
__forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
{
QuaternionDecomposition qd;
qd.scale_x = M.l.vx.x;
qd.scale_y = M.l.vy.y;
qd.scale_z = M.l.vz.z;
qd.shift_x = M.p.x;
qd.shift_y = M.p.y;
qd.shift_z = M.p.z;
qd.translation_x = M.l.vx.y;
qd.translation_y = M.l.vx.z;
qd.translation_z = M.l.vy.z;
qd.skew_xy = M.l.vy.x;
qd.skew_xz = M.l.vz.x;
qd.skew_yz = M.l.vz.y;
qd.quaternion_r = M.p.w;
qd.quaternion_i = M.l.vx.w;
qd.quaternion_j = M.l.vy.w;
qd.quaternion_k = M.l.vz.w;
return qd;
}
////////////////////////////////////////////////////////////////////////////////
/*
* ! Template Specialization for 2D: return matrix for rotation around point
* (rotation around arbitrarty vector is not meaningful in 2D)
*/
template<> __forceinline
AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
}
////////////////////////////////////////////////////////////////////////////////
// Similarity Transform
//
// checks, if M is a similarity transformation, i.e if there exists a factor D
// such that for all x,y: distance(Mx, My) = D * distance(x, y)
////////////////////////////////////////////////////////////////////////////////
__forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
{
if (D) *D = 0.f;
if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
const float D_x = dot(M.l.vx, M.l.vx);
const float D_y = dot(M.l.vy, M.l.vy);
const float D_z = dot(M.l.vz, M.l.vz);
if (abs(D_x - D_y) > 1e-5f ||
abs(D_x - D_z) > 1e-5f ||
abs(D_y - D_z) > 1e-5f)
return false;
if (D) *D = sqrtf(D_x);
return true;
}
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
{
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
Vec3fa::storeu(&ptr->l.vy, source.l.vy);
Vec3fa::storeu(&ptr->l.vz, source.l.vz);
Vec3fa::storeu(&ptr->p, source.p);
}
__forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
{
AffineSpace3fa space;
space.l.vx = Vec3fa::loadu(&ptr->l.vx);
space.l.vy = Vec3fa::loadu(&ptr->l.vy);
space.l.vz = Vec3fa::loadu(&ptr->l.vz);
space.p = Vec3fa::loadu(&ptr->p);
return space;
}
#undef VectorT
#undef ScalarT
}

View file

@ -0,0 +1,336 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
namespace embree
{
namespace internal {
template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
} // namespace internal
template<typename T>
struct BBox
{
T lower, upper;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox ( ) { }
template<typename T1>
__forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
__forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
__forceinline BBox ( const T& v ) : lower(v), upper(v) {}
__forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
////////////////////////////////////////////////////////////////////////////////
/// Extending Bounds
////////////////////////////////////////////////////////////////////////////////
__forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
__forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
/*! tests if box is empty */
__forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
/*! computes the size of the box */
__forceinline T size() const { return upper - lower; }
/*! computes the center of the box */
__forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
/*! computes twice the center of the box */
__forceinline T center2() const { return lower+upper; }
/*! merges two boxes */
__forceinline static const BBox merge (const BBox& a, const BBox& b) {
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
}
/*! intersects two boxes */
__forceinline static const BBox intersect (const BBox& a, const BBox& b) {
return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
}
/*! enlarge box by some scaling factor */
__forceinline BBox enlarge_by(const float a) const {
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
};
template<> __forceinline bool BBox<float>::empty() const {
return lower > upper;
}
#if defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline bool BBox<Vec3fa>::empty() const {
return !all(le_mask(lower,upper));
}
template<> __forceinline bool BBox<Vec3fx>::empty() const {
return !all(le_mask(lower,upper));
}
#endif
/*! tests if box is finite */
__forceinline bool isvalid( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
}
/*! tests if box is finite and non-empty*/
__forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
}
/*! tests if box has finite entries */
__forceinline bool is_finite( const BBox<Vec3fa>& b) {
return is_finite(b.lower) && is_finite(b.upper);
}
/*! test if point contained in box */
__forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
/*! computes the center of the box */
template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
/*! computes the volume of a bounding box */
__forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
__forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
/*! computes the volume of a bounding box */
__forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
/*! computes the surface area of a bounding box */
template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
return halfArea(box);
}
/*! merges bounding boxes and points */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
/*! Merges three boxes. */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
/*! Merges four boxes. */
template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
return merge(merge(a,b),merge(c,d));
}
/*! Comparison Operators */
template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
/*! scaling */
template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
/*! translations */
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
/*! extension */
template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
/*! intersect bounding boxes */
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
/*! subtract bounds from each other */
template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
{
c.lower = a.lower;
c.upper = min(a.upper,b.lower);
d.lower = max(a.lower,b.upper);
d.upper = a.upper;
}
/*! tests if bounding boxes (and points) are disjoint (empty intersection) */
template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
/*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
/*! subset relation */
template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
{
for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
return true;
}
template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
}
template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
}
/*! blending */
template<typename T>
__forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
}
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
return cout << "[" << box.lower << "; " << box.upper << "]";
}
/*! default template instantiations */
typedef BBox<float> BBox1f;
typedef BBox<Vec2f> BBox2f;
typedef BBox<Vec2fa> BBox2fa;
typedef BBox<Vec3f> BBox3f;
typedef BBox<Vec3fa> BBox3fa;
typedef BBox<Vec3fx> BBox3fx;
typedef BBox<Vec3ff> BBox3ff;
}
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined (__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined (__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<int N>
__forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
template<>
__forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat4>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#if defined(__AVX__)
template<>
__forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat8>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
(vfloat4&)bounds[4].lower,
(vfloat4&)bounds[5].lower,
(vfloat4&)bounds[6].lower,
(vfloat4&)bounds[7].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
(vfloat4&)bounds[4].upper,
(vfloat4&)bounds[5].upper,
(vfloat4&)bounds[6].upper,
(vfloat4&)bounds[7].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#endif
template<int N>
__forceinline BBox3fa merge(const BBox3fa* bounds);
template<>
__forceinline BBox3fa merge<4>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
min(bounds[2].lower,bounds[3].lower));
const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
max(bounds[2].upper,bounds[3].upper));
return BBox3fa(lower,upper);
}
#if defined(__AVX__)
template<>
__forceinline BBox3fa merge<8>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
return BBox3fa(lower,upper);
}
#endif
}

View file

@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// RGB Color Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Col3
{
T r, g, b;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 ( ) { }
__forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
__forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
__forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
__forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
__forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
__forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
__forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
};
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
/*! default template instantiations */
typedef Col3<unsigned char> Col3uc;
typedef Col3<float > Col3f;
}

View file

@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Col4
{
T r, g, b, a;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Col4 ( ) { }
__forceinline Col4 ( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; }
__forceinline Col4& operator=( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; }
__forceinline explicit Col4 (const T& v) : r(v), g(v), b(v), a(v) {}
__forceinline Col4 (const T& r, const T& g, const T& b, const T& a) : r(r), g(g), b(b), a(a) {}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Col4 (ZeroTy) : r(zero) , g(zero) , b(zero) , a(zero) {}
__forceinline Col4 (OneTy) : r(one) , g(one) , b(one) , a(one) {}
__forceinline Col4 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf), a(pos_inf) {}
__forceinline Col4 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf), a(neg_inf) {}
};
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col4<T>& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ", " << a.a << ")";
}
/*! default template instantiations */
typedef Col4<unsigned char> Col4uc;
typedef Col4<float > Col4f;
}

View file

@ -0,0 +1,268 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "color_sycl.h"
#else
#include "constants.h"
#include "col3.h"
#include "col4.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color4
{
union {
__m128 m128;
struct { float r,g,b,a; };
};
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4 () {}
__forceinline Color4 ( const __m128 a ) : m128(a) {}
__forceinline explicit Color4 (const float v) : m128(_mm_set1_ps(v)) {}
__forceinline Color4 (const float r, const float g, const float b, const float a) : m128(_mm_set_ps(a,b,g,r)) {}
__forceinline explicit Color4 ( const Col3uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(255.0f,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
__forceinline explicit Color4 ( const Col3f& other ) { m128 = _mm_set_ps(1.0f,other.b,other.g,other.r); }
__forceinline explicit Color4 ( const Col4uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(other.a,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
__forceinline explicit Color4 ( const Col4f& other ) { m128 = _mm_set_ps(other.a,other.b,other.g,other.r); }
__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = (unsigned char)(s[3]);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
__forceinline Color4( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// SSE RGB Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color
{
union {
__m128 m128;
struct { float r,g,b; };
};
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color () {}
__forceinline Color ( const __m128 a ) : m128(a) {}
__forceinline explicit Color (const float v) : m128(_mm_set1_ps(v)) {}
__forceinline Color (const float r, const float g, const float b) : m128(_mm_set_ps(0.0f,b,g,r)) {}
__forceinline Color ( const Color& other ) : m128(other.m128) {}
__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
__forceinline Color ( const Color4& other ) : m128(other.m128) {}
__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = 255;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
__forceinline Color( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a ) { return a; }
__forceinline const Color operator -( const Color& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline const Color abs ( const Color& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline const Color rcp ( const Color& a )
{
#if defined(__aarch64__)
__m128 reciprocal = _mm_rcp_ps(a.m128);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
return (const Color)reciprocal;
#else
#if defined(__AVX512VL__)
const Color r = _mm_rcp14_ps(a.m128);
#else
const Color r = _mm_rcp_ps(a.m128);
#endif
return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(a, r)))); // computes r + r * (1 - a * r)
#endif //defined(__aarch64__)
}
__forceinline const Color rsqrt( const Color& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif //defined(__aarch64__)
}
__forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a, const Color& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline const Color operator -( const Color& a, const Color& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline const Color operator *( const Color& a, const Color& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
__forceinline const Color min( const Color& a, const Color& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline const Color max( const Color& a, const Color& b ) { return _mm_max_ps(a.m128,b.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline bool operator < ( const Color& a, const Color& b ) {
if (a.r != b.r) return a.r < b.r;
if (a.g != b.g) return a.g < b.g;
if (a.b != b.b) return a.b < b.b;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f, t, mask);
}
////////////////////////////////////////////////////////////////////////////////
/// Special Operators
////////////////////////////////////////////////////////////////////////////////
/*! computes luminance of a color */
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
/*! output operator */
__forceinline embree_ostream operator<<(embree_ostream cout, const Color& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}
#endif

View file

@ -0,0 +1,219 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "constants.h"
#include "col3.h"
#include "col4.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color4
{
struct { float r,g,b,a; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4 () {}
//__forceinline Color4 ( const __m128 a ) : m128(a) {}
__forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
__forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
__forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
__forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
__forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
__forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
//__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
__forceinline void set(Col3uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
}
__forceinline void set(Col4uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
d.a = (unsigned char)(clamp(a)*255.0f);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
__forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
//__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// SSE RGB Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color
{
struct { float r,g,b; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color () {}
//__forceinline Color ( const __m128 a ) : m128(a) {}
__forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
__forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
//__forceinline Color ( const Color& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
//__forceinline Color ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
#if 0
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = 255;
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
__forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
//__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a ) { return a; }
__forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
__forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
__forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
__forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
__forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
__forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
__forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
__forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
__forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
__forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
__forceinline bool operator < ( const Color& a, const Color& b ) {
if (a.r != b.r) return a.r < b.r;
if (a.g != b.g) return a.g < b.g;
if (a.b != b.b) return a.b < b.b;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
return s ? t : f;
}
////////////////////////////////////////////////////////////////////////////////
/// Special Operators
////////////////////////////////////////////////////////////////////////////////
/*! computes luminance of a color */
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
/*! output operator */
inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}

View file

@ -0,0 +1,8 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "constants.h"
namespace embree
{
}

View file

@ -0,0 +1,209 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include <limits>
#define _USE_MATH_DEFINES
#include <math.h> // using cmath causes issues under Windows
#include <cfloat>
#include <climits>
namespace embree
{
static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f;
static MAYBE_UNUSED const float min_rcp_input = 1E-18f; // for abs(x) >= min_rcp_input the newton raphson rcp calculation does not fail
/* we consider floating point numbers in that range as valid input numbers */
static MAYBE_UNUSED float FLT_LARGE = 1.844E18f;
struct TrueTy {
__forceinline operator bool( ) const { return true; }
};
const constexpr TrueTy True = TrueTy();
struct FalseTy {
__forceinline operator bool( ) const { return false; }
};
const constexpr FalseTy False = FalseTy();
struct ZeroTy
{
__forceinline operator double ( ) const { return 0; }
__forceinline operator float ( ) const { return 0; }
__forceinline operator long long( ) const { return 0; }
__forceinline operator unsigned long long( ) const { return 0; }
__forceinline operator long ( ) const { return 0; }
__forceinline operator unsigned long ( ) const { return 0; }
__forceinline operator int ( ) const { return 0; }
__forceinline operator unsigned int ( ) const { return 0; }
__forceinline operator short ( ) const { return 0; }
__forceinline operator unsigned short ( ) const { return 0; }
__forceinline operator char ( ) const { return 0; }
__forceinline operator unsigned char ( ) const { return 0; }
};
const constexpr ZeroTy zero = ZeroTy();
struct OneTy
{
__forceinline operator double ( ) const { return 1; }
__forceinline operator float ( ) const { return 1; }
__forceinline operator long long( ) const { return 1; }
__forceinline operator unsigned long long( ) const { return 1; }
__forceinline operator long ( ) const { return 1; }
__forceinline operator unsigned long ( ) const { return 1; }
__forceinline operator int ( ) const { return 1; }
__forceinline operator unsigned int ( ) const { return 1; }
__forceinline operator short ( ) const { return 1; }
__forceinline operator unsigned short ( ) const { return 1; }
__forceinline operator char ( ) const { return 1; }
__forceinline operator unsigned char ( ) const { return 1; }
};
const constexpr OneTy one = OneTy();
struct NegInfTy
{
__forceinline operator double ( ) const { return -std::numeric_limits<double>::infinity(); }
__forceinline operator float ( ) const { return -std::numeric_limits<float>::infinity(); }
__forceinline operator long long( ) const { return std::numeric_limits<long long>::min(); }
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::min(); }
__forceinline operator long ( ) const { return std::numeric_limits<long>::min(); }
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::min(); }
__forceinline operator int ( ) const { return std::numeric_limits<int>::min(); }
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::min(); }
__forceinline operator short ( ) const { return std::numeric_limits<short>::min(); }
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::min(); }
__forceinline operator char ( ) const { return std::numeric_limits<char>::min(); }
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::min(); }
};
const constexpr NegInfTy neg_inf = NegInfTy();
struct PosInfTy
{
__forceinline operator double ( ) const { return std::numeric_limits<double>::infinity(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::infinity(); }
__forceinline operator long long( ) const { return std::numeric_limits<long long>::max(); }
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::max(); }
__forceinline operator long ( ) const { return std::numeric_limits<long>::max(); }
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::max(); }
__forceinline operator int ( ) const { return std::numeric_limits<int>::max(); }
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::max(); }
__forceinline operator short ( ) const { return std::numeric_limits<short>::max(); }
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::max(); }
__forceinline operator char ( ) const { return std::numeric_limits<char>::max(); }
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::max(); }
};
const constexpr PosInfTy inf = PosInfTy();
const constexpr PosInfTy pos_inf = PosInfTy();
struct NaNTy
{
__forceinline operator double( ) const { return std::numeric_limits<double>::quiet_NaN(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::quiet_NaN(); }
};
const constexpr NaNTy nan = NaNTy();
struct UlpTy
{
__forceinline operator double( ) const { return std::numeric_limits<double>::epsilon(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::epsilon(); }
};
const constexpr UlpTy ulp = UlpTy();
struct PiTy
{
__forceinline operator double( ) const { return double(M_PI); }
__forceinline operator float ( ) const { return float(M_PI); }
};
const constexpr PiTy pi = PiTy();
struct OneOverPiTy
{
__forceinline operator double( ) const { return double(M_1_PI); }
__forceinline operator float ( ) const { return float(M_1_PI); }
};
const constexpr OneOverPiTy one_over_pi = OneOverPiTy();
struct TwoPiTy
{
__forceinline operator double( ) const { return double(2.0*M_PI); }
__forceinline operator float ( ) const { return float(2.0*M_PI); }
};
const constexpr TwoPiTy two_pi = TwoPiTy();
struct OneOverTwoPiTy
{
__forceinline operator double( ) const { return double(0.5*M_1_PI); }
__forceinline operator float ( ) const { return float(0.5*M_1_PI); }
};
const constexpr OneOverTwoPiTy one_over_two_pi = OneOverTwoPiTy();
struct FourPiTy
{
__forceinline operator double( ) const { return double(4.0*M_PI); }
__forceinline operator float ( ) const { return float(4.0*M_PI); }
};
const constexpr FourPiTy four_pi = FourPiTy();
struct OneOverFourPiTy
{
__forceinline operator double( ) const { return double(0.25*M_1_PI); }
__forceinline operator float ( ) const { return float(0.25*M_1_PI); }
};
const constexpr OneOverFourPiTy one_over_four_pi = OneOverFourPiTy();
struct StepTy {
__forceinline operator double ( ) const { return 0; }
__forceinline operator float ( ) const { return 0; }
__forceinline operator long long( ) const { return 0; }
__forceinline operator unsigned long long( ) const { return 0; }
__forceinline operator long ( ) const { return 0; }
__forceinline operator unsigned long ( ) const { return 0; }
__forceinline operator int ( ) const { return 0; }
__forceinline operator unsigned int ( ) const { return 0; }
__forceinline operator short ( ) const { return 0; }
__forceinline operator unsigned short ( ) const { return 0; }
__forceinline operator char ( ) const { return 0; }
__forceinline operator unsigned char ( ) const { return 0; }
};
const constexpr StepTy step = StepTy();
struct ReverseStepTy {
};
const constexpr ReverseStepTy reverse_step = ReverseStepTy();
struct EmptyTy {
};
const constexpr EmptyTy empty = EmptyTy();
struct FullTy {
};
const constexpr FullTy full = FullTy();
struct UndefinedTy {
};
const constexpr UndefinedTy undefined = UndefinedTy();
}

View file

@ -0,0 +1,468 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "math_sycl.h"
#else
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <emmintrin.h>
#include <xmmintrin.h>
#include <immintrin.h>
#endif
#if defined(__WIN32__)
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
namespace std
{
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
__forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
__forceinline bool isfinite (const float x) { return _finite(x) != 0; }
}
#endif
#endif
namespace embree
{
__forceinline bool isvalid ( const float& v ) {
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
}
__forceinline int cast_f2i(float f) {
union { float f; int i; } v; v.f = f; return v.i;
}
__forceinline float cast_i2f(int i) {
union { float f; int i; } v; v.i = i; return v.f;
}
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline int asInt (const float& a) { return *((int*)&a); }
__forceinline float asFloat(const int& a) { return *((float*)&a); }
#if defined(__WIN32__)
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
#endif
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
__forceinline float sqr ( const float x ) { return x*x; }
__forceinline float rcp ( const float x )
{
#if defined(__aarch64__)
// Move scalar to vector register and do rcp.
__m128 a;
a[0] = x;
float32x4_t reciprocal = vrecpeq_f32(a);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
return reciprocal[0];
#else
const __m128 a = _mm_set_ss(x);
#if defined(__AVX512VL__)
const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
#else
const __m128 r = _mm_rcp_ss(a);
#endif
#if defined(__AVX2__)
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
#else
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
#endif
#endif //defined(__aarch64__)
}
__forceinline float signmsk ( const float x ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128i b;
a[0] = x;
b[0] = 0x80000000;
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
return a[0];
#else
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
#endif
}
__forceinline float xorf( const float x, const float y ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128 b;
a[0] = x;
b[0] = y;
a = _mm_xor_ps(a, b);
return a[0];
#else
return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
#endif
}
__forceinline float andf( const float x, const unsigned y ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128i b;
a[0] = x;
b[0] = y;
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
return a[0];
#else
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
#endif
}
__forceinline float rsqrt( const float x )
{
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
a[0] = x;
__m128 value = _mm_rsqrt_ps(a);
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
return value[0];
#else
const __m128 a = _mm_set_ss(x);
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
#else
__m128 r = _mm_rsqrt_ss(a);
#endif
const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
_mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
return _mm_cvtss_f32(c);
#endif
}
#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
#else
__forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
__forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
#endif
__forceinline float abs ( const float x ) { return ::fabsf(x); }
__forceinline float acos ( const float x ) { return ::acosf (x); }
__forceinline float asin ( const float x ) { return ::asinf (x); }
__forceinline float atan ( const float x ) { return ::atanf (x); }
__forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
__forceinline float cos ( const float x ) { return ::cosf (x); }
__forceinline float cosh ( const float x ) { return ::coshf (x); }
__forceinline float exp ( const float x ) { return ::expf (x); }
__forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
__forceinline float log ( const float x ) { return ::logf (x); }
__forceinline float log10( const float x ) { return ::log10f(x); }
__forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
__forceinline float sin ( const float x ) { return ::sinf (x); }
__forceinline float sinh ( const float x ) { return ::sinhf (x); }
__forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
__forceinline float tan ( const float x ) { return ::tanf (x); }
__forceinline float tanh ( const float x ) { return ::tanhf (x); }
__forceinline float floor( const float x ) { return ::floorf (x); }
__forceinline float ceil ( const float x ) { return ::ceilf (x); }
__forceinline float frac ( const float x ) { return x-floor(x); }
__forceinline double abs ( const double x ) { return ::fabs(x); }
__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
__forceinline double acos ( const double x ) { return ::acos (x); }
__forceinline double asin ( const double x ) { return ::asin (x); }
__forceinline double atan ( const double x ) { return ::atan (x); }
__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
__forceinline double cos ( const double x ) { return ::cos (x); }
__forceinline double cosh ( const double x ) { return ::cosh (x); }
__forceinline double exp ( const double x ) { return ::exp (x); }
__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
__forceinline double log ( const double x ) { return ::log (x); }
__forceinline double log10( const double x ) { return ::log10(x); }
__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
__forceinline double rcp ( const double x ) { return 1.0/x; }
__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
__forceinline double sin ( const double x ) { return ::sin (x); }
__forceinline double sinh ( const double x ) { return ::sinh (x); }
__forceinline double sqr ( const double x ) { return x*x; }
__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
__forceinline double tan ( const double x ) { return ::tan (x); }
__forceinline double tanh ( const double x ) { return ::tanh (x); }
__forceinline double floor( const double x ) { return ::floor (x); }
__forceinline double ceil ( const double x ) { return ::ceil (x); }
#if defined(__aarch64__)
__forceinline float mini(float a, float b) {
// FP and Neon shares same vector register in arm64
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_min_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__aarch64__)
__forceinline float maxi(float a, float b) {
// FP and Neon shares same vector register in arm64
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_max_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
template<typename T>
__forceinline T twice(const T& a) { return a+a; }
__forceinline int min(int a, int b) { return a<b ? a:b; }
__forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
__forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
__forceinline float min(float a, float b) { return a<b ? a:b; }
__forceinline double min(double a, double b) { return a<b ? a:b; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long min(long a, long b) { return a<b ? a:b; }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
__forceinline int max(int a, int b) { return a<b ? b:a; }
__forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
__forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
__forceinline float max(float a, float b) { return a<b ? b:a; }
__forceinline double max(double a, double b) { return a<b ? b:a; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long max(long a, long b) { return a<b ? b:a; }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
#if defined(__MACOSX__)
__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
#endif
#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
__forceinline void sincosf(float x, float *sin, float *cos) {
__sincosf(x,sin,cos);
}
#endif
#if defined(__WIN32__) || defined(__FreeBSD__)
__forceinline void sincosf(float x, float *s, float *c) {
*s = sinf(x); *c = cosf(x);
}
#endif
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
#if defined(__AVX2__)
__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
#elif defined (__aarch64__) && defined(__clang__)
#pragma clang fp contract(fast)
__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
#pragma clang fp contract(on)
#else
__forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
__forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
__forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
__forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
#endif
/*! random functions */
template<typename T> T random() { return T(0); }
#if defined(_WIN32)
template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
#else
template<> __forceinline int random() { return int(rand()); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
#endif
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
#if _WIN32
__forceinline double drand48() {
return double(rand())/double(RAND_MAX);
}
__forceinline void srand48(long seed) {
return srand(seed);
}
#endif
/*! selects */
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
}
/*! exchange */
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
/* load/store */
template<typename Ty> struct mem;
template<> struct mem<float> {
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
};
/*! bit reverse operation */
template<class T>
__forceinline T bitReverse(const T& vin)
{
T v = vin;
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
v = ( v >> 16 ) | ( v << 16);
return v;
}
/*! bit interleave operation */
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{
T x = xin, y = yin, z = zin;
x = (x | (x << 16)) & 0x030000FF;
x = (x | (x << 8)) & 0x0300F00F;
x = (x | (x << 4)) & 0x030C30C3;
x = (x | (x << 2)) & 0x09249249;
y = (y | (y << 16)) & 0x030000FF;
y = (y | (y << 8)) & 0x0300F00F;
y = (y | (y << 4)) & 0x030C30C3;
y = (y | (y << 2)) & 0x09249249;
z = (z | (z << 16)) & 0x030000FF;
z = (z | (z << 8)) & 0x0300F00F;
z = (z | (z << 4)) & 0x030C30C3;
z = (z | (z << 2)) & 0x09249249;
return x | (y << 1) | (z << 2);
}
#if defined(__AVX2__) && !defined(__aarch64__)
template<>
__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
{
const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
return xx | yy | zz;
}
#endif
/*! bit interleave operation for 64bit data types*/
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
T x = xin & 0x1fffff;
T y = yin & 0x1fffff;
T z = zin & 0x1fffff;
x = (x | x << 32) & 0x1f00000000ffff;
x = (x | x << 16) & 0x1f0000ff0000ff;
x = (x | x << 8) & 0x100f00f00f00f00f;
x = (x | x << 4) & 0x10c30c30c30c30c3;
x = (x | x << 2) & 0x1249249249249249;
y = (y | y << 32) & 0x1f00000000ffff;
y = (y | y << 16) & 0x1f0000ff0000ff;
y = (y | y << 8) & 0x100f00f00f00f00f;
y = (y | y << 4) & 0x10c30c30c30c30c3;
y = (y | y << 2) & 0x1249249249249249;
z = (z | z << 32) & 0x1f00000000ffff;
z = (z | z << 16) & 0x1f0000ff0000ff;
z = (z | z << 8) & 0x100f00f00f00f00f;
z = (z | z << 4) & 0x10c30c30c30c30c3;
z = (z | z << 2) & 0x1249249249249249;
return x | (y << 1) | (z << 2);
}
}
#endif

View file

@ -0,0 +1,161 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
#include "bbox.h"
namespace embree
{
template<typename V>
struct Interval
{
V lower, upper;
__forceinline Interval() {}
__forceinline Interval ( const Interval& other ) { lower = other.lower; upper = other.upper; }
__forceinline Interval& operator=( const Interval& other ) { lower = other.lower; upper = other.upper; return *this; }
__forceinline Interval(const V& a) : lower(a), upper(a) {}
__forceinline Interval(const V& lower, const V& upper) : lower(lower), upper(upper) {}
__forceinline Interval(const BBox<V>& a) : lower(a.lower), upper(a.upper) {}
/*! tests if box is empty */
//__forceinline bool empty() const { return lower > upper; }
/*! computes the size of the interval */
__forceinline V size() const { return upper - lower; }
__forceinline V center() const { return 0.5f*(lower+upper); }
__forceinline const Interval& extend(const Interval& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
__forceinline const Interval& extend(const V & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
__forceinline friend Interval operator +( const Interval& a, const Interval& b ) {
return Interval(a.lower+b.lower,a.upper+b.upper);
}
__forceinline friend Interval operator -( const Interval& a, const Interval& b ) {
return Interval(a.lower-b.upper,a.upper-b.lower);
}
__forceinline friend Interval operator -( const Interval& a, const V& b ) {
return Interval(a.lower-b,a.upper-b);
}
__forceinline friend Interval operator *( const Interval& a, const Interval& b )
{
const V ll = a.lower*b.lower;
const V lu = a.lower*b.upper;
const V ul = a.upper*b.lower;
const V uu = a.upper*b.upper;
return Interval(min(ll,lu,ul,uu),max(ll,lu,ul,uu));
}
__forceinline friend Interval merge( const Interval& a, const Interval& b) {
return Interval(min(a.lower,b.lower),max(a.upper,b.upper));
}
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c) {
return merge(merge(a,b),c);
}
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c, const Interval& d) {
return merge(merge(a,b),merge(c,d));
}
/*! intersect bounding boxes */
__forceinline friend const Interval intersect( const Interval& a, const Interval& b ) { return Interval(max(a.lower, b.lower), min(a.upper, b.upper)); }
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c ) { return intersect(a,intersect(b,c)); }
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c, const Interval& d ) { return intersect(intersect(a,b),intersect(c,d)); }
friend embree_ostream operator<<(embree_ostream cout, const Interval& a) {
return cout << "[" << a.lower << ", " << a.upper << "]";
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Interval( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline Interval( FullTy ) : lower(neg_inf), upper(pos_inf) {}
};
__forceinline bool isEmpty(const Interval<float>& v) {
return v.lower > v.upper;
}
__forceinline vboolx isEmpty(const Interval<vfloatx>& v) {
return v.lower > v.upper;
}
/*! subset relation */
template<typename T> __forceinline bool subset( const Interval<T>& a, const Interval<T>& b ) {
return (a.lower > b.lower) && (a.upper < b.upper);
}
template<typename T> __forceinline bool subset( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
return subset(a.x,b.x) && subset(a.y,b.y);
}
template<typename T> __forceinline const Vec2<Interval<T>> intersect( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
return Vec2<Interval<T>>(intersect(a.x,b.x),intersect(a.y,b.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Interval<T> select ( bool s, const Interval<T>& t, const Interval<T>& f ) {
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
}
template<typename T> __forceinline Interval<T> select ( const typename T::Bool& s, const Interval<T>& t, const Interval<T>& f ) {
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
}
__forceinline int numRoots(const Interval<float>& p0, const Interval<float>& p1)
{
float eps = 1E-4f;
bool neg0 = p0.lower < eps; bool pos0 = p0.upper > -eps;
bool neg1 = p1.lower < eps; bool pos1 = p1.upper > -eps;
return (neg0 && pos1) || (pos0 && neg1) || (neg0 && pos0) || (neg1 && pos1);
}
typedef Interval<float> Interval1f;
typedef Vec2<Interval<float>> Interval2f;
typedef Vec3<Interval<float>> Interval3f;
inline void swap(float& a, float& b) { float tmp = a; a = b; b = tmp; }
inline Interval1f shift(const Interval1f& v, float shift) { return Interval1f(v.lower + shift, v.upper + shift); }
#define TWO_PI (2.0*M_PI)
inline Interval1f sin(Interval1f interval)
{
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
float sinLower = sin(interval.lower);
float sinUpper = sin(interval.upper);
if (sinLower > sinUpper) swap(sinLower, sinUpper);
if (interval.lower < M_PI / 2.0 && interval.upper > M_PI / 2.0) sinUpper = 1.0;
if (interval.lower < 3.0 * M_PI / 2.0 && interval.upper > 3.0 * M_PI / 2.0) sinLower = -1.0;
return Interval1f(sinLower, sinUpper);
}
inline Interval1f cos(Interval1f interval)
{
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
float cosLower = cos(interval.lower);
float cosUpper = cos(interval.upper);
if (cosLower > cosUpper) swap(cosLower, cosUpper);
if (interval.lower < M_PI && interval.upper > M_PI) cosLower = -1.0;
return Interval1f(cosLower, cosUpper);
}
#undef TWO_PI
}

View file

@ -0,0 +1,331 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bbox.h"
#include "range.h"
namespace embree
{
template<typename T>
__forceinline std::pair<T,T> globalLinear(const std::pair<T,T>& v, const BBox1f& dt)
{
const float rcp_dt_size = float(1.0f)/dt.size();
const T g0 = lerp(v.first,v.second,-dt.lower*rcp_dt_size);
const T g1 = lerp(v.first,v.second,(1.0f-dt.lower)*rcp_dt_size);
return std::make_pair(g0,g1);
}
template<typename T>
struct LBBox
{
public:
__forceinline LBBox () {}
template<typename T1>
__forceinline LBBox ( const LBBox<T1>& other )
: bounds0(other.bounds0), bounds1(other.bounds1) {}
__forceinline LBBox& operator= ( const LBBox& other ) {
bounds0 = other.bounds0; bounds1 = other.bounds1; return *this;
}
__forceinline LBBox (EmptyTy)
: bounds0(EmptyTy()), bounds1(EmptyTy()) {}
__forceinline explicit LBBox ( const BBox<T>& bounds)
: bounds0(bounds), bounds1(bounds) { }
__forceinline LBBox ( const BBox<T>& bounds0, const BBox<T>& bounds1)
: bounds0(bounds0), bounds1(bounds1) { }
LBBox ( const avector<BBox<T>>& bounds )
{
assert(bounds.size());
BBox<T> b0 = bounds.front();
BBox<T> b1 = bounds.back();
for (size_t i=1; i<bounds.size()-1; i++) {
const float f = float(i)/float(bounds.size()-1);
const BBox<T> bt = lerp(b0,b1,f);
const T dlower = min(bounds[i].lower-bt.lower,T(zero));
const T dupper = max(bounds[i].upper-bt.upper,T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range, float numTimeSegments)
{
const float lower = time_range.lower*numTimeSegments;
const float upper = time_range.upper*numTimeSegments;
const float ilowerf = floor(lower);
const float iupperf = ceil(upper);
const int ilower = (int)ilowerf;
const int iupper = (int)iupperf;
const BBox<T> blower0 = bounds(ilower);
const BBox<T> bupper1 = bounds(iupper);
if (iupper-ilower == 1) {
bounds0 = lerp(blower0, bupper1, lower-ilowerf);
bounds1 = lerp(bupper1, blower0, iupperf-upper);
return;
}
const BBox<T> blower1 = bounds(ilower+1);
const BBox<T> bupper0 = bounds(iupper-1);
BBox<T> b0 = lerp(blower0, blower1, lower-ilowerf);
BBox<T> b1 = lerp(bupper1, bupper0, iupperf-upper);
for (int i = ilower+1; i < iupper; i++)
{
const float f = (float(i)/numTimeSegments - time_range.lower) / time_range.size();
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range_in, const BBox1f& geom_time_range, float geom_time_segments)
{
/* normalize global time_range_in to local geom_time_range */
const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(),
(time_range_in.upper-geom_time_range.lower)/geom_time_range.size());
const float lower = time_range.lower*geom_time_segments;
const float upper = time_range.upper*geom_time_segments;
const float ilowerf = floor(lower);
const float iupperf = ceil(upper);
const float ilowerfc = max(0.0f,ilowerf);
const float iupperfc = min(iupperf,geom_time_segments);
const int ilowerc = (int)ilowerfc;
const int iupperc = (int)iupperfc;
assert(iupperc-ilowerc > 0);
/* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */
const int ilower_iter = max(-1,(int)ilowerf);
const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1);
const BBox<T> blower0 = bounds(ilowerc);
const BBox<T> bupper1 = bounds(iupperc);
if (iupper_iter-ilower_iter == 1) {
bounds0 = lerp(blower0, bupper1, max(0.0f,lower-ilowerfc));
bounds1 = lerp(bupper1, blower0, max(0.0f,iupperfc-upper));
return;
}
const BBox<T> blower1 = bounds(ilowerc+1);
const BBox<T> bupper0 = bounds(iupperc-1);
BBox<T> b0 = lerp(blower0, blower1, max(0.0f,lower-ilowerfc));
BBox<T> b1 = lerp(bupper1, bupper0, max(0.0f,iupperfc-upper));
for (int i = ilower_iter+1; i < iupper_iter; i++)
{
const float f = (float(i)/geom_time_segments - time_range.lower) / time_range.size();
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const range<int>& time_range, int numTimeSegments)
{
const int ilower = time_range.begin();
const int iupper = time_range.end();
BBox<T> b0 = bounds(ilower);
BBox<T> b1 = bounds(iupper);
if (iupper-ilower == 1)
{
bounds0 = b0;
bounds1 = b1;
return;
}
for (int i = ilower+1; i<iupper; i++)
{
const float f = float(i - time_range.begin()) / float(time_range.size());
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
{
const BBox3f bounds0 = lbounds.bounds0;
const BBox3f bounds1 = lbounds.bounds1;
/* normalize global target_time_range to local time_range_in */
const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
(target_time_range.upper-time_range_in.lower)/time_range_in.size());
const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
/* compute bounds at begin and end of clipped time range */
BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
/* make sure that b0 is properly bounded at time_range_in.lower */
{
const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds0.lower-bt.lower, T(zero));
const T dupper = max(bounds0.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
/* make sure that b1 is properly bounded at time_range_in.upper */
{
const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds1.lower-bt.lower, T(zero));
const T dupper = max(bounds1.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
this->bounds0 = b0;
this->bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
: LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
public:
__forceinline bool empty() const {
return bounds().empty();
}
__forceinline BBox<T> bounds () const {
return merge(bounds0,bounds1);
}
__forceinline BBox<T> interpolate( const float t ) const {
return lerp(bounds0,bounds1,t);
}
__forceinline LBBox<T> interpolate( const BBox1f& dt ) const {
return LBBox<T>(interpolate(dt.lower),interpolate(dt.upper));
}
__forceinline void extend( const LBBox& other ) {
bounds0.extend(other.bounds0);
bounds1.extend(other.bounds1);
}
__forceinline float expectedHalfArea() const;
__forceinline float expectedHalfArea(const BBox1f& dt) const {
return interpolate(dt).expectedHalfArea();
}
__forceinline float expectedApproxHalfArea() const {
return 0.5f*(halfArea(bounds0) + halfArea(bounds1));
}
/* calculates bounds for [0,1] time range from bounds in dt time range */
__forceinline LBBox global(const BBox1f& dt) const
{
const float rcp_dt_size = 1.0f/dt.size();
const BBox<T> b0 = interpolate(-dt.lower*rcp_dt_size);
const BBox<T> b1 = interpolate((1.0f-dt.lower)*rcp_dt_size);
return LBBox(b0,b1);
}
/*! Comparison Operators */
//template<typename TT> friend __forceinline bool operator==( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
//template<typename TT> friend __forceinline bool operator!=( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
friend __forceinline bool operator==( const LBBox& a, const LBBox& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
friend __forceinline bool operator!=( const LBBox& a, const LBBox& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const LBBox& box) {
return cout << "LBBox { " << box.bounds0 << "; " << box.bounds1 << " }";
}
public:
BBox<T> bounds0, bounds1;
};
/*! tests if box is finite */
template<typename T>
__forceinline bool isvalid( const LBBox<T>& v ) {
return isvalid(v.bounds0) && isvalid(v.bounds1);
}
template<typename T>
__forceinline bool isvalid_non_empty( const LBBox<T>& v ) {
return isvalid_non_empty(v.bounds0) && isvalid_non_empty(v.bounds1);
}
template<typename T>
__forceinline T expectedArea(const T& a0, const T& a1, const T& b0, const T& b1)
{
const T da = a1-a0;
const T db = b1-b0;
return a0*b0+(a0*db+da*b0)*T(0.5f) + da*db*T(1.0f/3.0f);
}
template<> __forceinline float LBBox<Vec3fa>::expectedHalfArea() const
{
const Vec3fa d0 = bounds0.size();
const Vec3fa d1 = bounds1.size();
return reduce_add(expectedArea(Vec3fa(d0.x,d0.y,d0.z),
Vec3fa(d1.x,d1.y,d1.z),
Vec3fa(d0.y,d0.z,d0.x),
Vec3fa(d1.y,d1.z,d1.x)));
}
template<typename T>
__forceinline float expectedApproxHalfArea(const LBBox<T>& box) {
return box.expectedApproxHalfArea();
}
template<typename T>
__forceinline LBBox<T> merge(const LBBox<T>& a, const LBBox<T>& b) {
return LBBox<T>(merge(a.bounds0, b.bounds0), merge(a.bounds1, b.bounds1));
}
/*! subset relation */
template<typename T> __inline bool subset( const LBBox<T>& a, const LBBox<T>& b ) {
return subset(a.bounds0,b.bounds0) && subset(a.bounds1,b.bounds1);
}
/*! default template instantiations */
typedef LBBox<float> LBBox1f;
typedef LBBox<Vec2f> LBBox2f;
typedef LBBox<Vec3f> LBBox3f;
typedef LBBox<Vec3fa> LBBox3fa;
typedef LBBox<Vec3fx> LBBox3fx;
}

View file

@ -0,0 +1,149 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// 2D Linear Transform (2x2 Matrix)
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct LinearSpace2
{
typedef T Vector;
typedef typename T::Scalar Scalar;
/*! default matrix constructor */
__forceinline LinearSpace2 ( ) {}
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
template<typename L1> __forceinline LinearSpace2( const LinearSpace2<L1>& s ) : vx(s.vx), vy(s.vy) {}
/*! matrix construction from column vectors */
__forceinline LinearSpace2(const Vector& vx, const Vector& vy)
: vx(vx), vy(vy) {}
/*! matrix construction from row mayor data */
__forceinline LinearSpace2(const Scalar& m00, const Scalar& m01,
const Scalar& m10, const Scalar& m11)
: vx(m00,m10), vy(m01,m11) {}
/*! compute the determinant of the matrix */
__forceinline const Scalar det() const { return vx.x*vy.y - vx.y*vy.x; }
/*! compute adjoint matrix */
__forceinline const LinearSpace2 adjoint() const { return LinearSpace2(vy.y,-vy.x,-vx.y,vx.x); }
/*! compute inverse matrix */
__forceinline const LinearSpace2 inverse() const { return adjoint()/det(); }
/*! compute transposed matrix */
__forceinline const LinearSpace2 transposed() const { return LinearSpace2(vx.x,vx.y,vy.x,vy.y); }
/*! returns first row of matrix */
__forceinline Vector row0() const { return Vector(vx.x,vy.x); }
/*! returns second row of matrix */
__forceinline Vector row1() const { return Vector(vx.y,vy.y); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline LinearSpace2( ZeroTy ) : vx(zero), vy(zero) {}
__forceinline LinearSpace2( OneTy ) : vx(one, zero), vy(zero, one) {}
/*! return matrix for scaling */
static __forceinline LinearSpace2 scale(const Vector& s) {
return LinearSpace2(s.x, 0,
0 , s.y);
}
/*! return matrix for rotation */
static __forceinline LinearSpace2 rotate(const Scalar& r) {
Scalar s = sin(r), c = cos(r);
return LinearSpace2(c, -s,
s, c);
}
/*! return closest orthogonal matrix (i.e. a general rotation including reflection) */
LinearSpace2 orthogonal() const
{
LinearSpace2 m = *this;
// mirrored?
Scalar mirror(one);
if (m.det() < Scalar(zero)) {
m.vx = -m.vx;
mirror = -mirror;
}
// rotation
for (int i = 0; i < 99; i++) {
const LinearSpace2 m_next = 0.5 * (m + m.transposed().inverse());
const LinearSpace2 d = m_next - m;
m = m_next;
// norm^2 of difference small enough?
if (max(dot(d.vx, d.vx), dot(d.vy, d.vy)) < 1e-8)
break;
}
// rotation * mirror_x
return LinearSpace2(mirror*m.vx, m.vy);
}
public:
/*! the column vectors of the matrix */
Vector vx,vy;
};
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a ) { return LinearSpace2<T>(-a.vx,-a.vy); }
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a ) { return LinearSpace2<T>(+a.vx,+a.vy); }
template<typename T> __forceinline LinearSpace2<T> rcp ( const LinearSpace2<T>& a ) { return a.inverse(); }
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx+b.vx,a.vy+b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx-b.vx,a.vy-b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator*(const typename T::Scalar & a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
template<typename T> __forceinline T operator*(const LinearSpace2<T>& a, const T & b) { return b.x*a.vx + b.y*a.vy; }
template<typename T> __forceinline LinearSpace2<T> operator*(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const typename T::Scalar & b) { return LinearSpace2<T>(a.vx/b, a.vy/b); }
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return a * rcp(b); }
template<typename T> __forceinline LinearSpace2<T>& operator *=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a * b; }
template<typename T> __forceinline LinearSpace2<T>& operator /=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx == b.vx && a.vy == b.vy; }
template<typename T> __forceinline bool operator !=( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx != b.vx || a.vy != b.vy; }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace2<T>& m) {
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << "}";
}
/*! Shortcuts for common linear spaces. */
typedef LinearSpace2<Vec2f> LinearSpace2f;
typedef LinearSpace2<Vec2fa> LinearSpace2fa;
}

View file

@ -0,0 +1,217 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec3.h"
#include "quaternion.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// 3D Linear Transform (3x3 Matrix)
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct LinearSpace3
{
typedef T Vector;
typedef typename T::Scalar Scalar;
/*! default matrix constructor */
__forceinline LinearSpace3 ( ) {}
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
template<typename L1> __forceinline LinearSpace3( const LinearSpace3<L1>& s ) : vx(s.vx), vy(s.vy), vz(s.vz) {}
/*! matrix construction from column vectors */
__forceinline LinearSpace3(const Vector& vx, const Vector& vy, const Vector& vz)
: vx(vx), vy(vy), vz(vz) {}
/*! construction from quaternion */
__forceinline LinearSpace3( const QuaternionT<Scalar>& q )
: vx((q.r*q.r + q.i*q.i - q.j*q.j - q.k*q.k), 2.0f*(q.i*q.j + q.r*q.k), 2.0f*(q.i*q.k - q.r*q.j))
, vy(2.0f*(q.i*q.j - q.r*q.k), (q.r*q.r - q.i*q.i + q.j*q.j - q.k*q.k), 2.0f*(q.j*q.k + q.r*q.i))
, vz(2.0f*(q.i*q.k + q.r*q.j), 2.0f*(q.j*q.k - q.r*q.i), (q.r*q.r - q.i*q.i - q.j*q.j + q.k*q.k)) {}
/*! matrix construction from row mayor data */
__forceinline LinearSpace3(const Scalar& m00, const Scalar& m01, const Scalar& m02,
const Scalar& m10, const Scalar& m11, const Scalar& m12,
const Scalar& m20, const Scalar& m21, const Scalar& m22)
: vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {}
/*! compute the determinant of the matrix */
__forceinline const Scalar det() const { return dot(vx,cross(vy,vz)); }
/*! compute adjoint matrix */
__forceinline const LinearSpace3 adjoint() const { return LinearSpace3(cross(vy,vz),cross(vz,vx),cross(vx,vy)).transposed(); }
/*! compute inverse matrix */
__forceinline const LinearSpace3 inverse() const { return adjoint()/det(); }
/*! compute transposed matrix */
__forceinline const LinearSpace3 transposed() const { return LinearSpace3(vx.x,vx.y,vx.z,vy.x,vy.y,vy.z,vz.x,vz.y,vz.z); }
/*! returns first row of matrix */
__forceinline Vector row0() const { return Vector(vx.x,vy.x,vz.x); }
/*! returns second row of matrix */
__forceinline Vector row1() const { return Vector(vx.y,vy.y,vz.y); }
/*! returns third row of matrix */
__forceinline Vector row2() const { return Vector(vx.z,vy.z,vz.z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline LinearSpace3( ZeroTy ) : vx(zero), vy(zero), vz(zero) {}
__forceinline LinearSpace3( OneTy ) : vx(one, zero, zero), vy(zero, one, zero), vz(zero, zero, one) {}
/*! return matrix for scaling */
static __forceinline LinearSpace3 scale(const Vector& s) {
return LinearSpace3(s.x, 0, 0,
0 , s.y, 0,
0 , 0, s.z);
}
/*! return matrix for rotation around arbitrary axis */
static __forceinline LinearSpace3 rotate(const Vector& _u, const Scalar& r) {
Vector u = normalize(_u);
Scalar s = sin(r), c = cos(r);
return LinearSpace3(u.x*u.x+(1-u.x*u.x)*c, u.x*u.y*(1-c)-u.z*s, u.x*u.z*(1-c)+u.y*s,
u.x*u.y*(1-c)+u.z*s, u.y*u.y+(1-u.y*u.y)*c, u.y*u.z*(1-c)-u.x*s,
u.x*u.z*(1-c)-u.y*s, u.y*u.z*(1-c)+u.x*s, u.z*u.z+(1-u.z*u.z)*c);
}
public:
/*! the column vectors of the matrix */
Vector vx,vy,vz;
};
#if !defined(__SYCL_DEVICE_ONLY__)
/*! compute transposed matrix */
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
}
#endif
template<typename T>
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
return xfm.transposed();
}
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a ) { return LinearSpace3<T>(-a.vx,-a.vy,-a.vz); }
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a ) { return LinearSpace3<T>(+a.vx,+a.vy,+a.vz); }
template<typename T> __forceinline LinearSpace3<T> rcp ( const LinearSpace3<T>& a ) { return a.inverse(); }
/* constructs a coordinate frame form a normalized normal */
template<typename T> __forceinline LinearSpace3<T> frame(const T& N)
{
const T dx0(0,N.z,-N.y);
const T dx1(-N.z,0,N.x);
const T dx = normalize(select(dot(dx0,dx0) > dot(dx1,dx1),dx0,dx1));
const T dy = normalize(cross(N,dx));
return LinearSpace3<T>(dx,dy,N);
}
/* constructs a coordinate frame from a normal and approximate x-direction */
template<typename T> __forceinline LinearSpace3<T> frame(const T& N, const T& dxi)
{
if (abs(dot(dxi,N)) > 0.99f) return frame(N); // fallback in case N and dxi are very parallel
const T dx = normalize(cross(dxi,N));
const T dy = normalize(cross(N,dx));
return LinearSpace3<T>(dx,dy,N);
}
/* clamps linear space to range -1 to +1 */
template<typename T> __forceinline LinearSpace3<T> clamp(const LinearSpace3<T>& space) {
return LinearSpace3<T>(clamp(space.vx,T(-1.0f),T(1.0f)),
clamp(space.vy,T(-1.0f),T(1.0f)),
clamp(space.vz,T(-1.0f),T(1.0f)));
}
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx+b.vx,a.vy+b.vy,a.vz+b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx-b.vx,a.vy-b.vy,a.vz-b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator*(const typename T::Scalar & a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
template<typename T> __forceinline T operator*(const LinearSpace3<T>& a, const T & b) { return madd(T(b.x),a.vx,madd(T(b.y),a.vy,T(b.z)*a.vz)); }
template<typename T> __forceinline LinearSpace3<T> operator*(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const typename T::Scalar & b) { return LinearSpace3<T>(a.vx/b, a.vy/b, a.vz/b); }
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return a * rcp(b); }
template<typename T> __forceinline LinearSpace3<T>& operator *=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a * b; }
template<typename T> __forceinline LinearSpace3<T>& operator /=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a / b; }
template<typename T> __forceinline T xfmPoint (const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
template<typename T> __forceinline T xfmVector(const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
template<typename T> __forceinline T xfmNormal(const LinearSpace3<T>& s, const T & a) { return xfmVector(s.inverse().transposed(),a); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; }
template<typename T> __forceinline bool operator !=( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> select ( const typename T::Scalar::Bool& s, const LinearSpace3<T>& t, const LinearSpace3<T>& f ) {
return LinearSpace3<T>(select(s,t.vx,f.vx),select(s,t.vy,f.vy),select(s,t.vz,f.vz));
}
/*! blending */
template<typename T>
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0, const LinearSpace3<T>& l1, const float t)
{
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
lerp(l0.vy,l1.vy,t),
lerp(l0.vz,l1.vz,t));
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace3<T>& m) {
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << ", vz = " << m.vz << "}";
}
/*! Shortcuts for common linear spaces. */
typedef LinearSpace3<Vec3f> LinearSpace3f;
typedef LinearSpace3<Vec3fa> LinearSpace3fa;
typedef LinearSpace3<Vec3fx> LinearSpace3fx;
typedef LinearSpace3<Vec3ff> LinearSpace3ff;
template<int N> using LinearSpace3vf = LinearSpace3<Vec3<vfloat<N>>>;
typedef LinearSpace3<Vec3<vfloat<4>>> LinearSpace3vf4;
typedef LinearSpace3<Vec3<vfloat<8>>> LinearSpace3vf8;
typedef LinearSpace3<Vec3<vfloat<16>>> LinearSpace3vf16;
/*! blending */
template<typename T, typename S>
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0,
const LinearSpace3<T>& l1,
const S& t)
{
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
lerp(l0.vy,l1.vy,t),
lerp(l0.vz,l1.vz,t));
}
}

View file

@ -0,0 +1,279 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
namespace embree
{
__forceinline bool isvalid ( const float& v ) {
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
}
__forceinline int cast_f2i(float f) {
return __builtin_bit_cast(int,f);
}
__forceinline float cast_i2f(int i) {
return __builtin_bit_cast(float,i);
}
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
__forceinline float sqr ( const float x ) { return x*x; }
__forceinline float rcp ( const float x ) {
return sycl::native::recip(x);
}
__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
//__forceinline float signmsk ( const float x ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
//}
//__forceinline float xorf( const float x, const float y ) {
// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
//}
//__forceinline float andf( const float x, const unsigned y ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
//}
__forceinline float rsqrt( const float x ) {
return sycl::rsqrt(x);
}
//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
//__forceinline int roundf(float f) { return (int)(f + 0.5f); }
__forceinline float abs ( const float x ) { return sycl::fabs(x); }
__forceinline float acos ( const float x ) { return sycl::acos(x); }
__forceinline float asin ( const float x ) { return sycl::asin(x); }
__forceinline float atan ( const float x ) { return sycl::atan(x); }
__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
__forceinline float cos ( const float x ) { return sycl::cos(x); }
__forceinline float cosh ( const float x ) { return sycl::cosh(x); }
__forceinline float exp ( const float x ) { return sycl::exp(x); }
__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
__forceinline float log ( const float x ) { return sycl::log(x); }
__forceinline float log10( const float x ) { return sycl::log10(x); }
__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
__forceinline float sin ( const float x ) { return sycl::sin(x); }
__forceinline float sinh ( const float x ) { return sycl::sinh(x); }
__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
__forceinline float tan ( const float x ) { return sycl::tan(x); }
__forceinline float tanh ( const float x ) { return sycl::tanh(x); }
__forceinline float floor( const float x ) { return sycl::floor(x); }
__forceinline float ceil ( const float x ) { return sycl::ceil(x); }
__forceinline float frac ( const float x ) { return x-floor(x); }
//__forceinline double abs ( const double x ) { return ::fabs(x); }
//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
//__forceinline double acos ( const double x ) { return ::acos (x); }
//__forceinline double asin ( const double x ) { return ::asin (x); }
//__forceinline double atan ( const double x ) { return ::atan (x); }
//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
//__forceinline double cos ( const double x ) { return ::cos (x); }
//__forceinline double cosh ( const double x ) { return ::cosh (x); }
//__forceinline double exp ( const double x ) { return ::exp (x); }
//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
//__forceinline double log ( const double x ) { return ::log (x); }
//__forceinline double log10( const double x ) { return ::log10(x); }
//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
//__forceinline double rcp ( const double x ) { return 1.0/x; }
//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
//__forceinline double sin ( const double x ) { return ::sin (x); }
//__forceinline double sinh ( const double x ) { return ::sinh (x); }
//__forceinline double sqr ( const double x ) { return x*x; }
//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
//__forceinline double tan ( const double x ) { return ::tan (x); }
//__forceinline double tanh ( const double x ) { return ::tanh (x); }
//__forceinline double floor( const double x ) { return ::floor (x); }
//__forceinline double ceil ( const double x ) { return ::ceil (x); }
/*
#if defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
*/
template<typename T>
__forceinline T twice(const T& a) { return a+a; }
__forceinline int min(int a, int b) { return sycl::min(a,b); }
__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
__forceinline float min(float a, float b) { return sycl::fmin(a,b); }
__forceinline double min(double a, double b) { return sycl::fmin(a,b); }
#if defined(__X86_64__)
__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
__forceinline int max(int a, int b) { return sycl::max(a,b); }
__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
__forceinline float max(float a, float b) { return sycl::fmax(a,b); }
__forceinline double max(double a, double b) { return sycl::fmax(a,b); }
#if defined(__X86_64__)
__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
/*! random functions */
/*
template<typename T> T random() { return T(0); }
template<> __forceinline int random() { return int(rand()); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
*/
/*! selects */
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
}
/*! exchange */
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
/* load/store */
template<typename Ty> struct mem;
template<> struct mem<float> {
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
};
/*! bit reverse operation */
template<class T>
__forceinline T bitReverse(const T& vin)
{
T v = vin;
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
v = ( v >> 16 ) | ( v << 16);
return v;
}
/*! bit interleave operation */
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{
T x = xin, y = yin, z = zin;
x = (x | (x << 16)) & 0x030000FF;
x = (x | (x << 8)) & 0x0300F00F;
x = (x | (x << 4)) & 0x030C30C3;
x = (x | (x << 2)) & 0x09249249;
y = (y | (y << 16)) & 0x030000FF;
y = (y | (y << 8)) & 0x0300F00F;
y = (y | (y << 4)) & 0x030C30C3;
y = (y | (y << 2)) & 0x09249249;
z = (z | (z << 16)) & 0x030000FF;
z = (z | (z << 8)) & 0x0300F00F;
z = (z | (z << 4)) & 0x030C30C3;
z = (z | (z << 2)) & 0x09249249;
return x | (y << 1) | (z << 2);
}
/*! bit interleave operation for 64bit data types*/
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
T x = xin & 0x1fffff;
T y = yin & 0x1fffff;
T z = zin & 0x1fffff;
x = (x | x << 32) & 0x1f00000000ffff;
x = (x | x << 16) & 0x1f0000ff0000ff;
x = (x | x << 8) & 0x100f00f00f00f00f;
x = (x | x << 4) & 0x10c30c30c30c30c3;
x = (x | x << 2) & 0x1249249249249249;
y = (y | y << 32) & 0x1f00000000ffff;
y = (y | y << 16) & 0x1f0000ff0000ff;
y = (y | y << 8) & 0x100f00f00f00f00f;
y = (y | y << 4) & 0x10c30c30c30c30c3;
y = (y | y << 2) & 0x1249249249249249;
z = (z | z << 32) & 0x1f00000000ffff;
z = (z | z << 16) & 0x1f0000ff0000ff;
z = (z | z << 8) & 0x100f00f00f00f00f;
z = (z | z << 4) & 0x10c30c30c30c30c3;
z = (z | z << 2) & 0x1249249249249249;
return x | (y << 1) | (z << 2);
}
}

View file

@ -0,0 +1,39 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bbox.h"
#include "linearspace3.h"
namespace embree
{
/*! Oriented bounding box */
template<typename T>
struct OBBox
{
public:
__forceinline OBBox () {}
__forceinline OBBox (EmptyTy)
: space(one), bounds(empty) {}
__forceinline OBBox (const BBox<T>& bounds)
: space(one), bounds(bounds) {}
__forceinline OBBox (const LinearSpace3<T>& space, const BBox<T>& bounds)
: space(space), bounds(bounds) {}
friend embree_ostream operator<<(embree_ostream cout, const OBBox& p) {
return cout << "{ space = " << p.space << ", bounds = " << p.bounds << "}";
}
public:
LinearSpace3<T> space; //!< orthonormal transformation
BBox<T> bounds; //!< bounds in transformed space
};
typedef OBBox<Vec3f> OBBox3f;
typedef OBBox<Vec3fa> OBBox3fa;
}

View file

@ -0,0 +1,258 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec3.h"
#include "vec4.h"
#include "transcendental.h"
namespace embree
{
////////////////////////////////////////////////////////////////
// Quaternion Struct
////////////////////////////////////////////////////////////////
template<typename T>
struct QuaternionT
{
typedef Vec3<T> Vector;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline QuaternionT () { }
__forceinline QuaternionT ( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; }
__forceinline QuaternionT& operator=( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; return *this; }
__forceinline QuaternionT( const T& r ) : r(r), i(zero), j(zero), k(zero) {}
__forceinline explicit QuaternionT( const Vec3<T>& v ) : r(zero), i(v.x), j(v.y), k(v.z) {}
__forceinline explicit QuaternionT( const Vec4<T>& v ) : r(v.x), i(v.y), j(v.z), k(v.w) {}
__forceinline QuaternionT( const T& r, const T& i, const T& j, const T& k ) : r(r), i(i), j(j), k(k) {}
__forceinline QuaternionT( const T& r, const Vec3<T>& v ) : r(r), i(v.x), j(v.y), k(v.z) {}
__inline QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz );
__inline QuaternionT( const T& yaw, const T& pitch, const T& roll );
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline QuaternionT( ZeroTy ) : r(zero), i(zero), j(zero), k(zero) {}
__forceinline QuaternionT( OneTy ) : r( one), i(zero), j(zero), k(zero) {}
/*! return quaternion for rotation around arbitrary axis */
static __forceinline QuaternionT rotate(const Vec3<T>& u, const T& r) {
return QuaternionT<T>(cos(T(0.5)*r),sin(T(0.5)*r)*normalize(u));
}
/*! returns the rotation axis of the quaternion as a vector */
__forceinline Vec3<T> v( ) const { return Vec3<T>(i, j, k); }
public:
T r, i, j, k;
};
template<typename T> __forceinline QuaternionT<T> operator *( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a * b.r, a * b.i, a * b.j, a * b.k); }
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r * b, a.i * b, a.j * b, a.k * b); }
////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a ) { return QuaternionT<T>(+a.r, +a.i, +a.j, +a.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a ) { return QuaternionT<T>(-a.r, -a.i, -a.j, -a.k); }
template<typename T> __forceinline QuaternionT<T> conj ( const QuaternionT<T>& a ) { return QuaternionT<T>(a.r, -a.i, -a.j, -a.k); }
template<typename T> __forceinline T abs ( const QuaternionT<T>& a ) { return sqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
template<typename T> __forceinline QuaternionT<T> rcp ( const QuaternionT<T>& a ) { return conj(a)*rcp(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
template<typename T> __forceinline QuaternionT<T> normalize ( const QuaternionT<T>& a ) { return a*rsqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
// evaluates a*q-r
template<typename T> __forceinline QuaternionT<T>
msub(const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(msub(a, q.r, p.r),
msub(a, q.i, p.i),
msub(a, q.j, p.j),
msub(a, q.k, p.k));
}
// evaluates a*q-r
template<typename T> __forceinline QuaternionT<T>
madd (const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(madd(a, q.r, p.r),
madd(a, q.i, p.i),
madd(a, q.j, p.j),
madd(a, q.k, p.k));
}
////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////
template<typename T> __forceinline QuaternionT<T> operator +( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a + b.r, b.i, b.j, b.k); }
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r + b, a.i, a.j, a.k); }
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r + b.r, a.i + b.i, a.j + b.j, a.k + b.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a - b.r, -b.i, -b.j, -b.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r - b, a.i, a.j, a.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r - b.r, a.i - b.i, a.j - b.j, a.k - b.k); }
template<typename T> __forceinline Vec3<T> operator *( const QuaternionT<T>& a, const Vec3<T> & b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const QuaternionT<T>& b ) {
return QuaternionT<T>(a.r*b.r - a.i*b.i - a.j*b.j - a.k*b.k,
a.r*b.i + a.i*b.r + a.j*b.k - a.k*b.j,
a.r*b.j - a.i*b.k + a.j*b.r + a.k*b.i,
a.r*b.k + a.i*b.j - a.j*b.i + a.k*b.r);
}
template<typename T> __forceinline QuaternionT<T> operator /( const T & a, const QuaternionT<T>& b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const T & b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const T & b ) { return a = a+b; }
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a+b; }
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const T & b ) { return a = a-b; }
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a-b; }
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const T & b ) { return a = a*b; }
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*b; }
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const T & b ) { return a = a*rcp(b); }
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*rcp(b); }
template<typename T, typename M> __forceinline QuaternionT<T>
select(const M& m, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(select(m, q.r, p.r),
select(m, q.i, p.i),
select(m, q.j, p.j),
select(m, q.k, p.k));
}
template<typename T> __forceinline Vec3<T> xfmPoint ( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline Vec3<T> xfmVector( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline Vec3<T> xfmNormal( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline T dot(const QuaternionT<T>& a, const QuaternionT<T>& b) { return a.r*b.r + a.i*b.i + a.j*b.j + a.k*b.k; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r == b.r && a.i == b.i && a.j == b.j && a.k == b.k; }
template<typename T> __forceinline bool operator !=( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r != b.r || a.i != b.i || a.j != b.j || a.k != b.k; }
////////////////////////////////////////////////////////////////////////////////
/// Orientation Functions
////////////////////////////////////////////////////////////////////////////////
template<typename T> QuaternionT<T>::QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz )
{
if ( vx.x + vy.y + vz.z >= T(zero) )
{
const T t = T(one) + (vx.x + vy.y + vz.z);
const T s = rsqrt(t)*T(0.5f);
r = t*s;
i = (vy.z - vz.y)*s;
j = (vz.x - vx.z)*s;
k = (vx.y - vy.x)*s;
}
else if ( vx.x >= max(vy.y, vz.z) )
{
const T t = (T(one) + vx.x) - (vy.y + vz.z);
const T s = rsqrt(t)*T(0.5f);
r = (vy.z - vz.y)*s;
i = t*s;
j = (vx.y + vy.x)*s;
k = (vz.x + vx.z)*s;
}
else if ( vy.y >= vz.z ) // if ( vy.y >= max(vz.z, vx.x) )
{
const T t = (T(one) + vy.y) - (vz.z + vx.x);
const T s = rsqrt(t)*T(0.5f);
r = (vz.x - vx.z)*s;
i = (vx.y + vy.x)*s;
j = t*s;
k = (vy.z + vz.y)*s;
}
else //if ( vz.z >= max(vy.y, vx.x) )
{
const T t = (T(one) + vz.z) - (vx.x + vy.y);
const T s = rsqrt(t)*T(0.5f);
r = (vx.y - vy.x)*s;
i = (vz.x + vx.z)*s;
j = (vy.z + vz.y)*s;
k = t*s;
}
}
template<typename T> QuaternionT<T>::QuaternionT( const T& yaw, const T& pitch, const T& roll )
{
const T cya = cos(yaw *T(0.5f));
const T cpi = cos(pitch*T(0.5f));
const T cro = cos(roll *T(0.5f));
const T sya = sin(yaw *T(0.5f));
const T spi = sin(pitch*T(0.5f));
const T sro = sin(roll *T(0.5f));
r = cro*cya*cpi + sro*sya*spi;
i = cro*cya*spi + sro*sya*cpi;
j = cro*sya*cpi - sro*cya*spi;
k = sro*cya*cpi - cro*sya*spi;
}
//////////////////////////////////////////////////////////////////////////////
/// Output Operators
//////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const QuaternionT<T>& q) {
return cout << "{ r = " << q.r << ", i = " << q.i << ", j = " << q.j << ", k = " << q.k << " }";
}
/*! default template instantiations */
typedef QuaternionT<float> Quaternion3f;
typedef QuaternionT<double> Quaternion3d;
template<int N> using Quaternion3vf = QuaternionT<vfloat<N>>;
typedef QuaternionT<vfloat<4>> Quaternion3vf4;
typedef QuaternionT<vfloat<8>> Quaternion3vf8;
typedef QuaternionT<vfloat<16>> Quaternion3vf16;
//////////////////////////////////////////////////////////////////////////////
/// Interpolation
//////////////////////////////////////////////////////////////////////////////
template<typename T>
__forceinline QuaternionT<T>lerp(const QuaternionT<T>& q0,
const QuaternionT<T>& q1,
const T& factor)
{
QuaternionT<T> q;
q.r = lerp(q0.r, q1.r, factor);
q.i = lerp(q0.i, q1.i, factor);
q.j = lerp(q0.j, q1.j, factor);
q.k = lerp(q0.k, q1.k, factor);
return q;
}
template<typename T>
__forceinline QuaternionT<T> slerp(const QuaternionT<T>& q0,
const QuaternionT<T>& q1_,
const T& t)
{
T cosTheta = dot(q0, q1_);
QuaternionT<T> q1 = select(cosTheta < 0.f, -q1_, q1_);
cosTheta = select(cosTheta < 0.f, -cosTheta, cosTheta);
// spherical linear interpolation
const T phi = t * fastapprox::acos(cosTheta);
T sinPhi, cosPhi;
fastapprox::sincos(phi, sinPhi, cosPhi);
QuaternionT<T> qperp = sinPhi * normalize(msub(cosTheta, q0, q1));
QuaternionT<T> qslerp = msub(cosPhi, q0, qperp);
// regular linear interpolation as fallback
QuaternionT<T> qlerp = normalize(lerp(q0, q1, t));
return select(cosTheta > 0.9995f, qlerp, qslerp);
}
}

View file

@ -0,0 +1,114 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec.h"
namespace embree {
struct RandomSampler
{
unsigned int s;
};
__forceinline unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k)
{
const unsigned int c1 = 0xcc9e2d51;
const unsigned int c2 = 0x1b873593;
const unsigned int r1 = 15;
const unsigned int r2 = 13;
const unsigned int m = 5;
const unsigned int n = 0xe6546b64;
k *= c1;
k = (k << r1) | (k >> (32 - r1));
k *= c2;
hash ^= k;
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
return hash;
}
__forceinline unsigned int MurmurHash3_finalize(unsigned int hash)
{
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
}
__forceinline unsigned int LCG_next(unsigned int value)
{
const unsigned int m = 1664525;
const unsigned int n = 1013904223;
return value * m + n;
}
__forceinline void RandomSampler_init(RandomSampler& self, int id)
{
unsigned int hash = 0;
hash = MurmurHash3_mix(hash, id);
hash = MurmurHash3_finalize(hash);
self.s = hash;
}
__forceinline void RandomSampler_init(RandomSampler& self, int pixelId, int sampleId)
{
unsigned int hash = 0;
hash = MurmurHash3_mix(hash, pixelId);
hash = MurmurHash3_mix(hash, sampleId);
hash = MurmurHash3_finalize(hash);
self.s = hash;
}
__forceinline void RandomSampler_init(RandomSampler& self, int x, int y, int sampleId)
{
RandomSampler_init(self, x | (y << 16), sampleId);
}
__forceinline int RandomSampler_getInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s >> 1;
}
__forceinline unsigned int RandomSampler_getUInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s;
}
__forceinline float RandomSampler_getFloat(RandomSampler& self) {
return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f;
}
__forceinline float RandomSampler_get1D(RandomSampler& self) {
return RandomSampler_getFloat(self);
}
__forceinline Vec2f RandomSampler_get2D(RandomSampler& self)
{
const float u = RandomSampler_get1D(self);
const float v = RandomSampler_get1D(self);
return Vec2f(u,v);
}
__forceinline Vec3fa RandomSampler_get3D(RandomSampler& self)
{
/*
const float u = RandomSampler_get1D(self);
const float v = RandomSampler_get1D(self);
const float w = RandomSampler_get1D(self);
return Vec3fa(u,v,w);
*/
const int u = RandomSampler_getUInt(self);
const int v = RandomSampler_getUInt(self);
const int w = RandomSampler_getUInt(self);
return Vec3fa(srl(Vec3ia(u,v,w), 1)) * 4.656612873077392578125e-10f;
}
} // namespace embree

View file

@ -0,0 +1,104 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../math/vec.isph"
struct RandomSampler
{
unsigned int s;
};
inline unsigned int MurmurHash3_mix(unsigned int hash, unsigned int k)
{
const unsigned int c1 = 0xcc9e2d51;
const unsigned int c2 = 0x1b873593;
const unsigned int r1 = 15;
const unsigned int r2 = 13;
const unsigned int m = 5;
const unsigned int n = 0xe6546b64;
k *= c1;
k = (k << r1) | (k >> (32 - r1));
k *= c2;
hash ^= k;
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
return hash;
}
inline unsigned int MurmurHash3_finalize(unsigned int hash)
{
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
}
inline unsigned int LCG_next(unsigned int value)
{
const unsigned int m = 1664525;
const unsigned int n = 1013904223;
return value * m + n;
}
inline void RandomSampler_init(RandomSampler& self, int id)
{
unsigned int hash = 0;
hash = MurmurHash3_mix(hash, id);
hash = MurmurHash3_finalize(hash);
self.s = hash;
}
inline void RandomSampler_init(RandomSampler& self, int pixelId, int sampleId)
{
unsigned int hash = 0;
hash = MurmurHash3_mix(hash, pixelId);
hash = MurmurHash3_mix(hash, sampleId);
hash = MurmurHash3_finalize(hash);
self.s = hash;
}
inline void RandomSampler_init(RandomSampler& self, int x, int y, int sampleId)
{
RandomSampler_init(self, x | (y << 16), sampleId);
}
inline int RandomSampler_getInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s >> 1;
}
inline unsigned int RandomSampler_getUInt(RandomSampler& self) {
self.s = LCG_next(self.s); return self.s;
}
inline float RandomSampler_getFloat(RandomSampler& self) {
return (float)RandomSampler_getInt(self) * 4.656612873077392578125e-10f;
}
inline float RandomSampler_get1D(RandomSampler& self) {
return RandomSampler_getFloat(self);
}
inline Vec2f RandomSampler_get2D(RandomSampler& self)
{
const float u = RandomSampler_get1D(self);
const float v = RandomSampler_get1D(self);
return make_Vec2f(u,v);
}
inline Vec3fa RandomSampler_get3D(RandomSampler& self)
{
const float u = RandomSampler_get1D(self);
const float v = RandomSampler_get1D(self);
const float w = RandomSampler_get1D(self);
return make_Vec3fa(u,v,w);
}

View file

@ -0,0 +1,137 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../math/emath.h"
namespace embree
{
template<typename Ty>
struct range
{
__forceinline range() {}
__forceinline range(const Ty& begin)
: _begin(begin), _end(begin+1) {}
__forceinline range(const Ty& begin, const Ty& end)
: _begin(begin), _end(end) {}
__forceinline range(const range& other)
: _begin(other._begin), _end(other._end) {}
template<typename T1>
__forceinline range(const range<T1>& other)
: _begin(Ty(other._begin)), _end(Ty(other._end)) {}
template<typename T1>
__forceinline range& operator =(const range<T1>& other) {
_begin = other._begin;
_end = other._end;
return *this;
}
__forceinline Ty begin() const {
return _begin;
}
__forceinline Ty end() const {
return _end;
}
__forceinline range intersect(const range& r) const {
return range (max(_begin,r._begin),min(_end,r._end));
}
__forceinline Ty size() const {
return _end - _begin;
}
__forceinline bool empty() const {
return _end <= _begin;
}
__forceinline Ty center() const {
return (_begin + _end)/2;
}
__forceinline std::pair<range,range> split() const
{
const Ty _center = center();
return std::make_pair(range(_begin,_center),range(_center,_end));
}
__forceinline void split(range& left_o, range& right_o) const
{
const Ty _center = center();
left_o = range(_begin,_center);
right_o = range(_center,_end);
}
__forceinline friend bool operator< (const range& r0, const range& r1) {
return r0.size() < r1.size();
}
friend embree_ostream operator<<(embree_ostream cout, const range& r) {
return cout << "range [" << r.begin() << ", " << r.end() << "]";
}
Ty _begin, _end;
};
template<typename Ty>
range<Ty> make_range(const Ty& begin, const Ty& end) {
return range<Ty>(begin,end);
}
template<typename Ty>
struct extended_range : public range<Ty>
{
__forceinline extended_range () {}
__forceinline extended_range (const Ty& begin)
: range<Ty>(begin), _ext_end(begin+1) {}
__forceinline extended_range (const Ty& begin, const Ty& end)
: range<Ty>(begin,end), _ext_end(end) {}
__forceinline extended_range (const Ty& begin, const Ty& end, const Ty& ext_end)
: range<Ty>(begin,end), _ext_end(ext_end) {}
__forceinline Ty ext_end() const {
return _ext_end;
}
__forceinline Ty ext_size() const {
return _ext_end - range<Ty>::_begin;
}
__forceinline Ty ext_range_size() const {
return _ext_end - range<Ty>::_end;
}
__forceinline bool has_ext_range() const {
assert(_ext_end >= range<Ty>::_end);
return (_ext_end - range<Ty>::_end) > 0;
}
__forceinline void set_ext_range(const size_t ext_end){
assert(ext_end >= range<Ty>::_end);
_ext_end = ext_end;
}
__forceinline void move_right(const size_t plus){
range<Ty>::_begin += plus;
range<Ty>::_end += plus;
_ext_end += plus;
}
friend embree_ostream operator<<(embree_ostream cout, const extended_range& r) {
return cout << "extended_range [" << r.begin() << ", " << r.end() << " (" << r.ext_end() << ")]";
}
Ty _ext_end;
};
}

View file

@ -0,0 +1,525 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
// Transcendental functions from "ispc": https://github.com/ispc/ispc/
// Most of the transcendental implementations in ispc code come from
// Solomon Boulos's "syrah": https://github.com/boulos/syrah/
#include "../simd/simd.h"
namespace embree
{
namespace fastapprox
{
template <typename T>
__forceinline T sin(const T &v)
{
static const float piOverTwoVec = 1.57079637050628662109375;
static const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto sinUseCos = (kMod4 == 1) | (kMod4 == 3);
auto flipSign = (kMod4 > 1);
// These coefficients are from sollya with fpminimax(sin(x)/x, [|0, 2,
// 4, 6, 8, 10|], [|single...|], [0;Pi/2]);
static const float sinC2 = -0.16666667163372039794921875;
static const float sinC4 = +8.333347737789154052734375e-3;
static const float sinC6 = -1.9842604524455964565277099609375e-4;
static const float sinC8 = +2.760012648650445044040679931640625e-6;
static const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
static const float cosC2 = -0.5;
static const float cosC4 = +4.166664183139801025390625e-2;
static const float cosC6 = -1.388833043165504932403564453125e-3;
static const float cosC8 = +2.47562347794882953166961669921875e-5;
static const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto outside = select(sinUseCos, 1., x);
auto c2 = select(sinUseCos, T(cosC2), T(sinC2));
auto c4 = select(sinUseCos, T(cosC4), T(sinC4));
auto c6 = select(sinUseCos, T(cosC6), T(sinC6));
auto c8 = select(sinUseCos, T(cosC8), T(sinC8));
auto c10 = select(sinUseCos, T(cosC10), T(sinC10));
auto x2 = x * x;
auto formula = x2 * c10 + c8;
formula = x2 * formula + c6;
formula = x2 * formula + c4;
formula = x2 * formula + c2;
formula = x2 * formula + 1.;
formula *= outside;
formula = select(flipSign, -formula, formula);
return formula;
}
template <typename T>
__forceinline T cos(const T &v)
{
static const float piOverTwoVec = 1.57079637050628662109375;
static const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto cosUseCos = (kMod4 == 0) | (kMod4 == 2);
auto flipSign = (kMod4 == 1) | (kMod4 == 2);
const float sinC2 = -0.16666667163372039794921875;
const float sinC4 = +8.333347737789154052734375e-3;
const float sinC6 = -1.9842604524455964565277099609375e-4;
const float sinC8 = +2.760012648650445044040679931640625e-6;
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
const float cosC2 = -0.5;
const float cosC4 = +4.166664183139801025390625e-2;
const float cosC6 = -1.388833043165504932403564453125e-3;
const float cosC8 = +2.47562347794882953166961669921875e-5;
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto outside = select(cosUseCos, 1., x);
auto c2 = select(cosUseCos, T(cosC2), T(sinC2));
auto c4 = select(cosUseCos, T(cosC4), T(sinC4));
auto c6 = select(cosUseCos, T(cosC6), T(sinC6));
auto c8 = select(cosUseCos, T(cosC8), T(sinC8));
auto c10 = select(cosUseCos, T(cosC10), T(sinC10));
auto x2 = x * x;
auto formula = x2 * c10 + c8;
formula = x2 * formula + c6;
formula = x2 * formula + c4;
formula = x2 * formula + c2;
formula = x2 * formula + 1.;
formula *= outside;
formula = select(flipSign, -formula, formula);
return formula;
}
template <typename T>
__forceinline void sincos(const T &v, T &sinResult, T &cosResult)
{
const float piOverTwoVec = 1.57079637050628662109375;
const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto cosUseCos = ((kMod4 == 0) | (kMod4 == 2));
auto sinUseCos = ((kMod4 == 1) | (kMod4 == 3));
auto sinFlipSign = (kMod4 > 1);
auto cosFlipSign = ((kMod4 == 1) | (kMod4 == 2));
const float oneVec = +1.;
const float sinC2 = -0.16666667163372039794921875;
const float sinC4 = +8.333347737789154052734375e-3;
const float sinC6 = -1.9842604524455964565277099609375e-4;
const float sinC8 = +2.760012648650445044040679931640625e-6;
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
const float cosC2 = -0.5;
const float cosC4 = +4.166664183139801025390625e-2;
const float cosC6 = -1.388833043165504932403564453125e-3;
const float cosC8 = +2.47562347794882953166961669921875e-5;
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto x2 = x * x;
auto sinFormula = x2 * sinC10 + sinC8;
auto cosFormula = x2 * cosC10 + cosC8;
sinFormula = x2 * sinFormula + sinC6;
cosFormula = x2 * cosFormula + cosC6;
sinFormula = x2 * sinFormula + sinC4;
cosFormula = x2 * cosFormula + cosC4;
sinFormula = x2 * sinFormula + sinC2;
cosFormula = x2 * cosFormula + cosC2;
sinFormula = x2 * sinFormula + oneVec;
cosFormula = x2 * cosFormula + oneVec;
sinFormula *= x;
sinResult = select(sinUseCos, cosFormula, sinFormula);
cosResult = select(cosUseCos, cosFormula, sinFormula);
sinResult = select(sinFlipSign, -sinResult, sinResult);
cosResult = select(cosFlipSign, -cosResult, cosResult);
}
template <typename T>
__forceinline T tan(const T &v)
{
const float piOverFourVec = 0.785398185253143310546875;
const float fourOverPiVec = 1.27323949337005615234375;
auto xLt0 = v < 0.;
auto y = select(xLt0, -v, v);
auto scaled = y * fourOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
auto x = y - kReal * piOverFourVec;
// If k & 1, x -= Pi/4
auto needOffset = (k & 1) != 0;
x = select(needOffset, x - piOverFourVec, x);
// If k & 3 == (0 or 3) let z = tan_In...(y) otherwise z = -cot_In0To...
auto kMod4 = k & 3;
auto useCotan = (kMod4 == 1) | (kMod4 == 2);
const float oneVec = 1.0;
const float tanC2 = +0.33333075046539306640625;
const float tanC4 = +0.13339905440807342529296875;
const float tanC6 = +5.3348250687122344970703125e-2;
const float tanC8 = +2.46033705770969390869140625e-2;
const float tanC10 = +2.892402000725269317626953125e-3;
const float tanC12 = +9.500005282461643218994140625e-3;
const float cotC2 = -0.3333333432674407958984375;
const float cotC4 = -2.222204394638538360595703125e-2;
const float cotC6 = -2.11752182804048061370849609375e-3;
const float cotC8 = -2.0846328698098659515380859375e-4;
const float cotC10 = -2.548247357481159269809722900390625e-5;
const float cotC12 = -3.5257363606433500535786151885986328125e-7;
auto x2 = x * x;
T z;
if (any(useCotan))
{
auto cotVal = x2 * cotC12 + cotC10;
cotVal = x2 * cotVal + cotC8;
cotVal = x2 * cotVal + cotC6;
cotVal = x2 * cotVal + cotC4;
cotVal = x2 * cotVal + cotC2;
cotVal = x2 * cotVal + oneVec;
// The equation is for x * cot(x) but we need -x * cot(x) for the tan part.
cotVal /= -x;
z = cotVal;
}
auto useTan = !useCotan;
if (any(useTan))
{
auto tanVal = x2 * tanC12 + tanC10;
tanVal = x2 * tanVal + tanC8;
tanVal = x2 * tanVal + tanC6;
tanVal = x2 * tanVal + tanC4;
tanVal = x2 * tanVal + tanC2;
tanVal = x2 * tanVal + oneVec;
// Equation was for tan(x)/x
tanVal *= x;
z = select(useTan, tanVal, z);
}
return select(xLt0, -z, z);
}
template <typename T>
__forceinline T asin(const T &x0)
{
auto isneg = (x0 < 0.f);
auto x = abs(x0);
auto isnan = (x > 1.f);
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
// avg error: 1.1105439e-06, max error 1.3187528e-06
auto v = 1.57079517841339111328125f +
x * (-0.21450997889041900634765625f +
x * (8.78556668758392333984375e-2f +
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375f;
v = select(v < 0.f, T(0.f), v);
v = select(isneg, -v, v);
v = select(isnan, T(cast_i2f(0x7fc00000)), v);
return v;
}
template <typename T>
__forceinline T acos(const T &v)
{
return 1.57079637050628662109375f - asin(v);
}
template <typename T>
__forceinline T atan(const T &v)
{
const float piOverTwoVec = 1.57079637050628662109375;
// atan(-x) = -atan(x) (so flip from negative to positive first)
// If x > 1 -> atan(x) = Pi/2 - atan(1/x)
auto xNeg = v < 0.f;
auto xFlipped = select(xNeg, -v, v);
auto xGt1 = xFlipped > 1.;
auto x = select(xGt1, rcpSafe(xFlipped), xFlipped);
// These coefficients approximate atan(x)/x
const float atanC0 = +0.99999988079071044921875;
const float atanC2 = -0.3333191573619842529296875;
const float atanC4 = +0.199689209461212158203125;
const float atanC6 = -0.14015688002109527587890625;
const float atanC8 = +9.905083477497100830078125e-2;
const float atanC10 = -5.93664981424808502197265625e-2;
const float atanC12 = +2.417283318936824798583984375e-2;
const float atanC14 = -4.6721356920897960662841796875e-3;
auto x2 = x * x;
auto result = x2 * atanC14 + atanC12;
result = x2 * result + atanC10;
result = x2 * result + atanC8;
result = x2 * result + atanC6;
result = x2 * result + atanC4;
result = x2 * result + atanC2;
result = x2 * result + atanC0;
result *= x;
result = select(xGt1, piOverTwoVec - result, result);
result = select(xNeg, -result, result);
return result;
}
template <typename T>
__forceinline T atan2(const T &y, const T &x)
{
const float piVec = 3.1415926536;
// atan2(y, x) =
//
// atan2(y > 0, x = +-0) -> Pi/2
// atan2(y < 0, x = +-0) -> -Pi/2
// atan2(y = +-0, x < +0) -> +-Pi
// atan2(y = +-0, x >= +0) -> +-0
//
// atan2(y >= 0, x < 0) -> Pi + atan(y/x)
// atan2(y < 0, x < 0) -> -Pi + atan(y/x)
// atan2(y, x > 0) -> atan(y/x)
//
// and then a bunch of code for dealing with infinities.
auto yOverX = y * rcpSafe(x);
auto atanArg = atan(yOverX);
auto xLt0 = x < 0.f;
auto yLt0 = y < 0.f;
auto offset = select(xLt0,
select(yLt0, T(-piVec), T(piVec)), 0.f);
return offset + atanArg;
}
template <typename T>
__forceinline T exp(const T &v)
{
const float ln2Part1 = 0.6931457519;
const float ln2Part2 = 1.4286067653e-6;
const float oneOverLn2 = 1.44269502162933349609375;
auto scaled = v * oneOverLn2;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * ln2Part1;
x -= kReal * ln2Part2;
// These coefficients are for e^x in [0, ln(2)]
const float one = 1.;
const float c2 = 0.4999999105930328369140625;
const float c3 = 0.166668415069580078125;
const float c4 = 4.16539050638675689697265625e-2;
const float c5 = 8.378830738365650177001953125e-3;
const float c6 = 1.304379315115511417388916015625e-3;
const float c7 = 2.7555381529964506626129150390625e-4;
auto result = x * c7 + c6;
result = x * result + c5;
result = x * result + c4;
result = x * result + c3;
result = x * result + c2;
result = x * result + one;
result = x * result + one;
// Compute 2^k (should differ for float and double, but I'll avoid
// it for now and just do floats)
const int fpbias = 127;
auto biasedN = k + fpbias;
auto overflow = kReal > fpbias;
// Minimum exponent is -126, so if k is <= -127 (k + 127 <= 0)
// we've got underflow. -127 * ln(2) -> -88.02. So the most
// negative float input that doesn't result in zero is like -88.
auto underflow = kReal <= -fpbias;
const int infBits = 0x7f800000;
biasedN <<= 23;
// Reinterpret this thing as float
auto twoToTheN = asFloat(biasedN);
// Handle both doubles and floats (hopefully eliding the copy for float)
auto elemtype2n = twoToTheN;
result *= elemtype2n;
result = select(overflow, cast_i2f(infBits), result);
result = select(underflow, 0., result);
return result;
}
// Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
// * log(2) + log(y) where y is the reduced range (usually in [1/2, 1)).
template <typename T, typename R>
__forceinline void __rangeReduceLog(const T &input,
T &reduced,
R &exponent)
{
auto intVersion = asInt(input);
// single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
// exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000
// 0x7 0xF 0x8 0x0 0x0 0x0 0x0 0x0
// non-exponent = 1000 0000 0111 1111 1111 1111 1111 1111
// = 0x8 0x0 0x7 0xF 0xF 0xF 0xF 0xF
//const int exponentMask(0x7F800000)
static const int nonexponentMask = 0x807FFFFF;
// We want the reduced version to have an exponent of -1 which is
// -1 + 127 after biasing or 126
static const int exponentNeg1 = (126l << 23);
// NOTE(boulos): We don't need to mask anything out since we know
// the sign bit has to be 0. If it's 1, we need to return infinity/nan
// anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
auto biasedExponent = intVersion >> 23; // This number is [0, 255] but it means [-127, 128]
auto offsetExponent = biasedExponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2
exponent = offsetExponent - 127; // get the real value
// Blend the offset_exponent with the original input (do this in
// int for now, until I decide if float can have & and &not)
auto blended = (intVersion & nonexponentMask) | (exponentNeg1);
reduced = asFloat(blended);
}
template <typename T> struct ExponentType { };
template <int N> struct ExponentType<vfloat_impl<N>> { typedef vint<N> Ty; };
template <> struct ExponentType<float> { typedef int Ty; };
template <typename T>
__forceinline T log(const T &v)
{
T reduced;
typename ExponentType<T>::Ty exponent;
const int nanBits = 0x7fc00000;
const int negInfBits = 0xFF800000;
const float nan = cast_i2f(nanBits);
const float negInf = cast_i2f(negInfBits);
auto useNan = v < 0.;
auto useInf = v == 0.;
auto exceptional = useNan | useInf;
const float one = 1.0;
auto patched = select(exceptional, one, v);
__rangeReduceLog(patched, reduced, exponent);
const float ln2 = 0.693147182464599609375;
auto x1 = one - reduced;
const float c1 = +0.50000095367431640625;
const float c2 = +0.33326041698455810546875;
const float c3 = +0.2519190013408660888671875;
const float c4 = +0.17541764676570892333984375;
const float c5 = +0.3424419462680816650390625;
const float c6 = -0.599632322788238525390625;
const float c7 = +1.98442304134368896484375;
const float c8 = -2.4899270534515380859375;
const float c9 = +1.7491014003753662109375;
auto result = x1 * c9 + c8;
result = x1 * result + c7;
result = x1 * result + c6;
result = x1 * result + c5;
result = x1 * result + c4;
result = x1 * result + c3;
result = x1 * result + c2;
result = x1 * result + c1;
result = x1 * result + one;
// Equation was for -(ln(red)/(1-red))
result *= -x1;
result += toFloat(exponent) * ln2;
return select(exceptional,
select(useNan, T(nan), T(negInf)),
result);
}
template <typename T>
__forceinline T pow(const T &x, const T &y)
{
auto x1 = abs(x);
auto z = exp(y * log(x1));
// Handle special cases
const float twoOver23 = 8388608.0f;
auto yInt = y == round(y);
auto yOddInt = select(yInt, asInt(abs(y) + twoOver23) << 31, 0); // set sign bit
// x == 0
z = select(x == 0.0f,
select(y < 0.0f, T(inf) | signmsk(x),
select(y == 0.0f, T(1.0f), asFloat(yOddInt) & x)), z);
// x < 0
auto xNegative = x < 0.0f;
if (any(xNegative))
{
auto z1 = z | asFloat(yOddInt);
z1 = select(yInt, z1, std::numeric_limits<float>::quiet_NaN());
z = select(xNegative, z1, z);
}
auto xFinite = isfinite(x);
auto yFinite = isfinite(y);
if (all(xFinite & yFinite))
return z;
// x finite and y infinite
z = select(andn(xFinite, yFinite),
select(x1 == 1.0f, 1.0f,
select((x1 > 1.0f) ^ (y < 0.0f), inf, T(0.0f))), z);
// x infinite
z = select(xFinite, z,
select(y == 0.0f, 1.0f,
select(y < 0.0f, T(0.0f), inf) | (asFloat(yOddInt) & x)));
return z;
}
template <typename T>
__forceinline T pow(const T &x, float y)
{
return pow(x, T(y));
}
} // namespace fastapprox
} // namespace embree

View file

@ -0,0 +1,87 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
#include "vec4.h"
namespace embree {
__forceinline Vec3f neg(const Vec3f& a ) { return -a; }
__forceinline Vec3fa neg(const Vec3fa& a) { return -a; }
__forceinline bool eq (const Vec3fa& a, const Vec3fa& b) { return a == b; }
__forceinline bool ne (const Vec3fa& a, const Vec3fa& b) { return a != b; }
// FIXME: change order of lerp arguments, then remove this function
template<typename V>
__forceinline V lerpr(float t, const V& v0, const V& v1) {
return (1.0f-t)*v0 + t*v1;
}
// -------------------------------------------------------
// sRGB conversion functions
// -------------------------------------------------------
#define APPROXIMATE_SRGB
inline float linear_to_srgb(const float f)
{
const float c = max(f, 0.f);
#ifdef APPROXIMATE_SRGB
return pow(c, 1.f/2.2f);
#else
return c <= 0.0031308f ? 12.92f*c : pow(c, 1.f/2.4f)*1.055f - 0.055f;
#endif
}
inline Vec4f linear_to_srgba(const Vec4f c)
{
return Vec4f(linear_to_srgb(c.x),
linear_to_srgb(c.y),
linear_to_srgb(c.z),
max(c.w, 0.f)); // alpha is never gamma-corrected
}
inline uint32_t linear_to_srgba8(const Vec4f c)
{
#if 1
Vec4f l = 255.f * min(linear_to_srgba(c), Vec4f(1.f));
return
((uint32_t)l.x << 0) |
((uint32_t)l.y << 8) |
((uint32_t)l.z << 16) |
((uint32_t)l.w << 24);
#else
// TODO use ISPC's float_to_srgb8 once it is fixed (issue #1198)
return
(float_to_srgb8(c.x) << 0) |
(float_to_srgb8(c.y) << 8) |
(float_to_srgb8(c.z) << 16) |
((uint32_t)clamp(c.w, 0.f, 1.f) << 24); // alpha is never gamma-corrected
#endif
}
inline float srgb_to_linear(const float f)
{
const float c = max(f, 0.f);
#ifdef APPROXIMATE_SRGB
return pow(c, 2.2f);
#else
return c <= 0.04045f ? c/12.92f : pow((c + 0.055f)/1.055f, 2.4f);
#endif
}
inline Vec4f srgba_to_linear(const Vec4f c)
{
return Vec4f(srgb_to_linear(c.x),
srgb_to_linear(c.y),
srgb_to_linear(c.z),
max(c.w, 0.f)); // alpha is never gamma-corrected
}
// TODO implement srgba8_to_linear with a 256 entry LUT
#undef APPROXIMATE_SRGB
} // namespace embree

View file

@ -0,0 +1,236 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
struct Vec2fa;
////////////////////////////////////////////////////////////////////////////////
/// Generic 2D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec2
{
enum { N = 2 };
union {
struct { T x, y; };
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2( ) {}
__forceinline explicit Vec2( const T& a ) : x(a), y(a) {}
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
Vec2( const Vec2fa& other );
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2( ZeroTy ) : x(zero), y(zero) {}
__forceinline Vec2( OneTy ) : x(one), y(one) {}
__forceinline Vec2( PosInfTy ) : x(pos_inf), y(pos_inf) {}
__forceinline Vec2( NegInfTy ) : x(neg_inf), y(neg_inf) {}
#if defined(__WIN32__) && _MSC_VER == 1800 // workaround for older VS 2013 compiler
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return (&x)[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 2); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return components[axis]; }
__forceinline T& operator [](const size_t axis ) { assert(axis < 2); return components[axis]; }
#endif
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a ) { return Vec2<T>(+a.x, +a.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a ) { return Vec2<T>(-a.x, -a.y); }
template<typename T> __forceinline Vec2<T> abs ( const Vec2<T>& a ) { return Vec2<T>(abs (a.x), abs (a.y)); }
template<typename T> __forceinline Vec2<T> rcp ( const Vec2<T>& a ) { return Vec2<T>(rcp (a.x), rcp (a.y)); }
template<typename T> __forceinline Vec2<T> rsqrt ( const Vec2<T>& a ) { return Vec2<T>(rsqrt(a.x), rsqrt(a.y)); }
template<typename T> __forceinline Vec2<T> sqrt ( const Vec2<T>& a ) { return Vec2<T>(sqrt (a.x), sqrt (a.y)); }
template<typename T> __forceinline Vec2<T> frac ( const Vec2<T>& a ) { return Vec2<T>(frac (a.x), frac (a.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x + b.x, a.y + b.y); }
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x + b , a.y + b ); }
template<typename T> __forceinline Vec2<T> operator +( const T& a, const Vec2<T>& b ) { return Vec2<T>(a + b.x, a + b.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x - b.x, a.y - b.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x - b , a.y - b ); }
template<typename T> __forceinline Vec2<T> operator -( const T& a, const Vec2<T>& b ) { return Vec2<T>(a - b.x, a - b.y); }
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x * b.x, a.y * b.y); }
template<typename T> __forceinline Vec2<T> operator *( const T& a, const Vec2<T>& b ) { return Vec2<T>(a * b.x, a * b.y); }
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x * b , a.y * b ); }
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x / b.x, a.y / b.y); }
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x / b , a.y / b ); }
template<typename T> __forceinline Vec2<T> operator /( const T& a, const Vec2<T>& b ) { return Vec2<T>(a / b.x, a / b.y); }
template<typename T> __forceinline Vec2<T> min(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(min(a.x, b.x), min(a.y, b.y)); }
template<typename T> __forceinline Vec2<T> max(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(max(a.x, b.x), max(a.y, b.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> madd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> msub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmadd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmsub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> madd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a,b.x,c.x), madd(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> msub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a,b.x,c.x), msub(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmadd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmsub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y) ); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T>& operator +=( Vec2<T>& a, const Vec2<T>& b ) { a.x += b.x; a.y += b.y; return a; }
template<typename T> __forceinline Vec2<T>& operator -=( Vec2<T>& a, const Vec2<T>& b ) { a.x -= b.x; a.y -= b.y; return a; }
template<typename T> __forceinline Vec2<T>& operator *=( Vec2<T>& a, const T& b ) { a.x *= b ; a.y *= b ; return a; }
template<typename T> __forceinline Vec2<T>& operator /=( Vec2<T>& a, const T& b ) { a.x /= b ; a.y /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec2<T>& a ) { return a.x + a.y; }
template<typename T> __forceinline T reduce_mul( const Vec2<T>& a ) { return a.x * a.y; }
template<typename T> __forceinline T reduce_min( const Vec2<T>& a ) { return min(a.x, a.y); }
template<typename T> __forceinline T reduce_max( const Vec2<T>& a ) { return max(a.x, a.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec2<T>& a, const Vec2<T>& b ) { return a.x == b.x && a.y == b.y; }
template<typename T> __forceinline bool operator !=( const Vec2<T>& a, const Vec2<T>& b ) { return a.x != b.x || a.y != b.y; }
template<typename T> __forceinline bool operator < ( const Vec2<T>& a, const Vec2<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> shift_right_1( const Vec2<T>& a ) {
return Vec2<T>(shift_right_1(a.x),shift_right_1(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T dot ( const Vec2<T>& a, const Vec2<T>& b ) { return madd(a.x,b.x,a.y*b.y); }
template<typename T> __forceinline Vec2<T> cross ( const Vec2<T>& a ) { return Vec2<T>(-a.y,a.x); }
template<typename T> __forceinline T length ( const Vec2<T>& a ) { return sqrt(dot(a,a)); }
template<typename T> __forceinline Vec2<T> normalize( const Vec2<T>& a ) { return a*rsqrt(dot(a,a)); }
template<typename T> __forceinline T distance ( const Vec2<T>& a, const Vec2<T>& b ) { return length(a-b); }
template<typename T> __forceinline T det ( const Vec2<T>& a, const Vec2<T>& b ) { return a.x*b.y - a.y*b.x; }
template<typename T> __forceinline Vec2<T> normalize_safe( const Vec2<T>& a ) {
const T d = dot(a,a); return select(d == T( zero ),a, a*rsqrt(d) );
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> select ( bool s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
}
template<typename T> __forceinline Vec2<T> select ( const Vec2<bool>& s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y));
}
template<typename T> __forceinline Vec2<T> select ( const typename T::Bool& s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
}
template<typename T>
__forceinline Vec2<T> lerp(const Vec2<T>& v0, const Vec2<T>& v1, const T& t) {
return madd(Vec2<T>(T(1.0f)-t),v0,t*v1);
}
template<typename T> __forceinline int maxDim ( const Vec2<T>& a )
{
const Vec2<T> b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2<T>& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
////////////////////////////////////////////////////////////////////////////////
/// Default template instantiations
////////////////////////////////////////////////////////////////////////////////
typedef Vec2<bool > Vec2b;
typedef Vec2<int > Vec2i;
typedef Vec2<float> Vec2f;
}
#include "vec2fa.h"
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined(__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#if defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
#if defined(__AVX__)
template<> __forceinline Vec2<vfloat8>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
}

View file

@ -0,0 +1,325 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec2fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec2fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec2fa
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 2 };
union {
__m128 m128;
struct { float x,y,az,aw; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ) {}
__forceinline Vec2fa( const __m128 a ) : m128(a) {}
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
__forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec2fa load( const void* const a ) {
return Vec2fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
}
static __forceinline Vec2fa loadu( const void* const a ) {
return Vec2fa(_mm_and_ps(_mm_loadu_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
}
static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
_mm_storeu_ps((float*)ptr,v);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
__forceinline Vec2fa operator -( const Vec2fa& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline Vec2fa abs ( const Vec2fa& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline Vec2fa sign ( const Vec2fa& a ) {
return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
}
__forceinline Vec2fa rcp ( const Vec2fa& a )
{
#if defined(__aarch64__)
__m128 reciprocal = _mm_rcp_ps(a.m128);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
return (const Vec2fa)reciprocal;
#else
#if defined(__AVX512VL__)
const Vec2fa r = _mm_rcp14_ps(a.m128);
#else
const Vec2fa r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
const Vec2fa res = _mm_fmadd_ps(r, h_n, r); // Then compute r + r * h_n
#else
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)); // First, compute 1 - a * r (which will be very close to 0)
const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n)); // Then compute r + r * h_n
#endif
return res;
#endif //defined(__aarch64__)
}
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
__forceinline Vec2fa rsqrt( const Vec2fa& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif
}
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec2fa log ( const Vec2fa& a ) {
return Vec2fa(logf(a.x),logf(a.y));
}
__forceinline Vec2fa exp ( const Vec2fa& a ) {
return Vec2fa(expf(a.x),expf(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
return Vec2fa(powf(a.x,b),powf(a.y,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__)
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
#else
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b+c;}
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b-c; }
#endif
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
__forceinline float reduce_min(const Vec2fa& v) { return min(v.x,v.y); }
__forceinline float reduce_max(const Vec2fa& v) { return max(v.x,v.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
}
#else
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec2fa cross ( const Vec2fa& a ) {
return Vec2fa(-a.y,a.x);
}
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f, t, mask);
}
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec2fa& a )
{
const Vec2fa b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
#elif defined (__SSE4_1__)
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
#else
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(ceilf (a.x),ceilf (a.y)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
typedef Vec2fa Vec2fa_t;
}
#endif

View file

@ -0,0 +1,270 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
struct Vec3fa;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec2fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec2fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 2 };
struct { float x,y; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ) {}
//__forceinline Vec2fa( const __m128 a ) : m128(a) {}
explicit Vec2fa(const Vec3fa& a);
__forceinline explicit Vec2fa( const vfloat<4>& a ) {
x = a[0];
y = a[1];
}
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
__forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
__forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
//__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec2fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline Vec2fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline void storeu ( void* a, const Vec2fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
__forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
__forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
__forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
//__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
__forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
__forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
return Vec2fa(x,y);
}
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec2fa log ( const Vec2fa& a ) {
return Vec2fa(sycl::log(a.x),sycl::log(a.y));
}
__forceinline Vec2fa exp ( const Vec2fa& a ) {
return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
}
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
return Vec2fa(powf(a.x,b),powf(a.y,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
__forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
__forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec2fa cross ( const Vec2fa& a ) {
return Vec2fa(-a.y,a.x);
}
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
}
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec2fa& a )
{
const Vec2fa b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
/*template<>
__forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
{
v = 0;
const unsigned int lid = get_sub_group_local_id();
if (lid == 0) v = a.x;
if (lid == 1) v = a.y;
}*/
typedef Vec2fa Vec2fa_t;
}

View file

@ -0,0 +1,357 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
struct Vec3fa;
////////////////////////////////////////////////////////////////////////////////
/// Generic 3D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec3
{
enum { N = 3 };
union {
struct {
T x, y, z;
};
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3( ) {}
__forceinline explicit Vec3( const T& a ) : x(a), y(a), z(a) {}
__forceinline Vec3( const T& x, const T& y, const T& z ) : x(x), y(y), z(z) {}
__forceinline Vec3( const Vec3& other ) { x = other.x; y = other.y; z = other.z; }
__forceinline Vec3( const Vec3fa& other );
template<typename T1> __forceinline Vec3( const Vec3<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)) {}
template<typename T1> __forceinline Vec3& operator =(const Vec3<T1>& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3( ZeroTy ) : x(zero), y(zero), z(zero) {}
__forceinline Vec3( OneTy ) : x(one), y(one), z(one) {}
__forceinline Vec3( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf) {}
__forceinline Vec3( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf) {}
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
__forceinline const T& operator []( const size_t axis ) const { assert(axis < 3); return (&x)[axis]; }
__forceinline T& operator []( const size_t axis ) { assert(axis < 3); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis) const { assert(axis < 3); return components[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 3); return components[axis]; }
#endif
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a ) { return Vec3<T>(+a.x, +a.y, +a.z); }
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a ) { return Vec3<T>(-a.x, -a.y, -a.z); }
template<typename T> __forceinline Vec3<T> abs ( const Vec3<T>& a ) { return Vec3<T>(abs (a.x), abs (a.y), abs (a.z)); }
template<typename T> __forceinline Vec3<T> rcp ( const Vec3<T>& a ) { return Vec3<T>(rcp (a.x), rcp (a.y), rcp (a.z)); }
template<typename T> __forceinline Vec3<T> rsqrt ( const Vec3<T>& a ) { return Vec3<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z)); }
template<typename T> __forceinline Vec3<T> sqrt ( const Vec3<T>& a ) { return Vec3<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z)); }
template<typename T> __forceinline Vec3<T> zero_fix( const Vec3<T>& a )
{
return Vec3<T>(select(abs(a.x)<min_rcp_input,T(min_rcp_input),a.x),
select(abs(a.y)<min_rcp_input,T(min_rcp_input),a.y),
select(abs(a.z)<min_rcp_input,T(min_rcp_input),a.z));
}
template<typename T> __forceinline Vec3<T> rcp_safe(const Vec3<T>& a) { return rcp(zero_fix(a)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x + b.x, a.y + b.y, a.z + b.z); }
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x - b.x, a.y - b.y, a.z - b.z); }
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x * b.x, a.y * b.y, a.z * b.z); }
template<typename T> __forceinline Vec3<T> operator *( const T& a, const Vec3<T>& b ) { return Vec3<T>(a * b.x, a * b.y, a * b.z); }
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x * b , a.y * b , a.z * b ); }
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x / b , a.y / b , a.z / b ); }
template<typename T> __forceinline Vec3<T> operator /( const T& a, const Vec3<T>& b ) { return Vec3<T>(a / b.x, a / b.y, a / b.z); }
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x / b.x, a.y / b.y, a.z / b.z); }
template<typename T> __forceinline Vec3<T> min(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); }
template<typename T> __forceinline Vec3<T> max(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); }
template<typename T> __forceinline Vec3<T> operator >>( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x >> b, a.y >> b, a.z >> b); }
template<typename T> __forceinline Vec3<T> operator <<( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x << b, a.y << b, a.z << b); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> madd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> msub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> nmadd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z));}
template<typename T> __forceinline Vec3<T> nmsub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> madd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> msub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> nmadd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z));}
template<typename T> __forceinline Vec3<T> nmsub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const T b ) { a.x += b; a.y += b; a.z += b; return a; }
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const Vec3<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; return a; }
template<typename T> __forceinline Vec3<T>& operator -=( Vec3<T>& a, const Vec3<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; return a; }
template<typename T> __forceinline Vec3<T>& operator *=( Vec3<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; return a; }
template<typename T> __forceinline Vec3<T>& operator /=( Vec3<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec3<T>& a ) { return a.x + a.y + a.z; }
template<typename T> __forceinline T reduce_mul( const Vec3<T>& a ) { return a.x * a.y * a.z; }
template<typename T> __forceinline T reduce_min( const Vec3<T>& a ) { return min(a.x, a.y, a.z); }
template<typename T> __forceinline T reduce_max( const Vec3<T>& a ) { return max(a.x, a.y, a.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec3<T>& a, const Vec3<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
template<typename T> __forceinline bool operator !=( const Vec3<T>& a, const Vec3<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
template<typename T> __forceinline bool operator < ( const Vec3<T>& a, const Vec3<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> shift_right_1( const Vec3<T>& a ) {
return Vec3<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> select ( bool s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
}
template<typename T> __forceinline Vec3<T> select ( const Vec3<bool>& s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z));
}
template<typename T> __forceinline Vec3<T> select ( const typename T::Bool& s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
}
template<typename T>
__forceinline Vec3<T> lerp(const Vec3<T>& v0, const Vec3<T>& v1, const T& t) {
return madd(Vec3<T>(T(1.0f)-t),v0,t*v1);
}
template<typename T> __forceinline int maxDim ( const Vec3<T>& a )
{
const Vec3<T> b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<bool> eq_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x==b.x,a.y==b.y,a.z==b.z); }
template<typename T> __forceinline Vec3<bool> neq_mask(const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x!=b.x,a.y!=b.y,a.z!=b.z); }
template<typename T> __forceinline Vec3<bool> lt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x< b.x,a.y< b.y,a.z< b.z); }
template<typename T> __forceinline Vec3<bool> le_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x<=b.x,a.y<=b.y,a.z<=b.z); }
template<typename T> __forceinline Vec3<bool> gt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x> b.x,a.y> b.y,a.z> b.z); }
template<typename T> __forceinline Vec3<bool> ge_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x>=b.x,a.y>=b.y,a.z>=b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T sqr ( const Vec3<T>& a ) { return dot(a,a); }
template<typename T> __forceinline T dot ( const Vec3<T>& a, const Vec3<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,a.z*b.z)); }
template<typename T> __forceinline T length ( const Vec3<T>& a ) { return sqrt(sqr(a)); }
template<typename T> __forceinline T rcp_length( const Vec3<T>& a ) { return rsqrt(sqr(a)); }
template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a ) { return a*rsqrt(sqr(a)); }
template<typename T> __forceinline T distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); }
template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); }
template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c )
{
const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x;
const T bc_x = b.z*c.y, bc_y = b.x*c.z, bc_z = b.y*c.x;
const Vec3<T> cross_ab(msub(a.y,b.z,ab_x), msub(a.z,b.x,ab_y), msub(a.x,b.y,ab_z));
const Vec3<T> cross_bc(msub(b.y,c.z,bc_x), msub(b.z,c.x,bc_y), msub(b.x,c.y,bc_z));
const auto sx = abs(ab_x) < abs(bc_x);
const auto sy = abs(ab_y) < abs(bc_y);
const auto sz = abs(ab_z) < abs(bc_z);
return Vec3<T>(select(sx,cross_ab.x,cross_bc.x),
select(sy,cross_ab.y,cross_bc.y),
select(sz,cross_ab.z,cross_bc.z));
}
template<typename T> __forceinline T sum ( const Vec3<T>& a ) { return a.x+a.y+a.z; }
template<typename T> __forceinline T halfArea ( const Vec3<T>& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
template<typename T> __forceinline T area ( const Vec3<T>& d ) { return 2.0f*halfArea(d); }
template<typename T> __forceinline Vec3<T> normalize_safe( const Vec3<T>& a ) {
const T d = dot(a,a); return select(d == T( zero ), a , a*rsqrt(d) );
}
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& P, const Vec3<T>& Q0, const Vec3<T>& Q1)
{
const Vec3<T> N = cross(P-Q0,Q1-Q0);
const Vec3<T> D = Q1-Q0;
return dot(N,N)*rcp(dot(D,D));
}
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& PmQ0, const Vec3<T>& Q1mQ0)
{
const Vec3<T> N = cross(PmQ0,Q1mQ0);
const Vec3<T> D = Q1mQ0;
return dot(N,N)*rcp(dot(D,D));
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3<T>& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3<bool > Vec3b;
typedef Vec3<int > Vec3i;
typedef Vec3<float> Vec3f;
}
#include "vec3ba.h"
#include "vec3ia.h"
#include "vec3fa.h"
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined(__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<typename Out, typename In>
__forceinline Vec3<Out> broadcast(const Vec3<In>& a, const size_t k) {
return Vec3<Out>(Out(a.x[k]), Out(a.y[k]), Out(a.z[k]));
}
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
template<>
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
}
#endif
#if defined(__SSE__) || defined(__ARM_NEON)
template<>
__forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]));
}
template<int i0, int i1, int i2, int i3>
__forceinline Vec3<vfloat4> shuffle(const Vec3<vfloat4>& b) {
return Vec3<vfloat4>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
}
#endif
#if defined(__AVX__)
template<>
__forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
template<>
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
}
template<>
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat8>(const Vec3<vfloat8>& a, const size_t k) {
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
}
template<int i0, int i1, int i2, int i3>
__forceinline Vec3<vfloat8> shuffle(const Vec3<vfloat8>& b) {
return Vec3<vfloat8>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#endif
}

View file

@ -0,0 +1,127 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ba_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ba Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ba
{
ALIGNED_STRUCT_(16);
union {
__m128 m128;
struct { int x,y,z; };
};
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( ) {}
__forceinline Vec3ba( const __m128 input ) : m128(input) {}
__forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {}
__forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; }
__forceinline explicit Vec3ba( bool a )
: m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
__forceinline Vec3ba( bool a, bool b, bool c)
: m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); }
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); }
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7;
}
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7;
}
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; }
__forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; }
__forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; }
__forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; }
__forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; }
__forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
}
}
#endif

View file

@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ba Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ba
{
//ALIGNED_STRUCT_(16);
struct { bool x,y,z; };
typedef bool Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( ) {}
//__forceinline Vec3ba( const __m128 input ) : m128(input) {}
__forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
__forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
__forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
//__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
return a.x == b.x && a.y == b.y && a.z == b.z;
}
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
return a.x != b.x || a.y != b.y || a.z != b.z;
}
/*
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
__forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
__forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
__forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
__forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
return cout;
}
}

View file

@ -0,0 +1,791 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fa
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
union {
__m128 m128;
struct { float x,y,z; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ) {}
__forceinline Vec3fa( const __m128 a ) : m128(a) {}
__forceinline Vec3fa ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; }
__forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
__forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fa load( const void* const a ) {
#if defined(__aarch64__)
__m128 t = _mm_load_ps((float*)a);
t[3] = 0.0f;
return Vec3fa(t);
#else
return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
#endif
}
static __forceinline Vec3fa loadu( const void* const a ) {
return Vec3fa(_mm_loadu_ps((float*)a));
}
static __forceinline void storeu ( void* ptr, const Vec3fa& v ) {
_mm_storeu_ps((float*)ptr,v.m128);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
__forceinline Vec3fa operator -( const Vec3fa& a ) {
#if defined(__aarch64__)
return vnegq_f32(a.m128);
#else
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
#endif
}
__forceinline Vec3fa abs ( const Vec3fa& a ) {
#if defined(__aarch64__)
return _mm_abs_ps(a.m128);
#else
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
#endif
}
__forceinline Vec3fa sign ( const Vec3fa& a ) {
return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128));
}
__forceinline Vec3fa rcp ( const Vec3fa& a )
{
#if defined(__aarch64__)
return vdivq_f32(vdupq_n_f32(1.0f),a.m128);
#else
#if defined(__AVX512VL__)
const Vec3fa r = _mm_rcp14_ps(a.m128);
#else
const Vec3fa r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec3fa h_n = _mm_fnmadd_ps(a.m128, r.m128, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
const Vec3fa res = _mm_fmadd_ps(r.m128, h_n.m128, r.m128); // Then compute r + r * h_n
#else
const Vec3fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a.m128, r.m128)); // First, compute 1 - a * r (which will be very close to 0)
const Vec3fa res = _mm_add_ps(r.m128,_mm_mul_ps(r.m128, h_n.m128)); // Then compute r + r * h_n
#endif
return res;
#endif //defined(__aarch64__)
}
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a.m128,a.m128); }
__forceinline Vec3fa rsqrt( const Vec3fa& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif
}
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fa log ( const Vec3fa& a ) {
return Vec3fa(logf(a.x),logf(a.y),logf(a.z));
}
__forceinline Vec3fa exp ( const Vec3fa& a ) {
return Vec3fa(expf(a.x),expf(a.y),expf(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__) || defined(__ARM_NEON)
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
#else
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;}
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
#endif
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline float reduce_add(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = 0.0f;
return vaddvq_f32(t);
}
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = t[2];
return vminvq_f32(t);
}
__forceinline float reduce_max(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = t[2];
return vmaxvq_f32(t);
}
#else
__forceinline float reduce_add(const Vec3fa& v) {
const vfloat4 a(v.m128);
const vfloat4 b = shuffle<1>(a);
const vfloat4 c = shuffle<2>(a);
return _mm_cvtss_f32(a+b+c);
}
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); }
__forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); }
#if defined(__aarch64__)
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpgt_ps (a.m128, b.m128); }
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpge_ps (a.m128, b.m128); }
#else
__forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); }
__forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); }
#endif
__forceinline bool isvalid ( const Vec3fa& v ) {
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fa& a ) {
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fa& v ) {
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
}
__forceinline bool is_finite4 ( const Vec3fa& a ) {
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
}
#else
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b )
{
vfloat4 a0 = vfloat4(a.m128);
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
vfloat4 b1 = vfloat4(b.m128);
return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
}
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f.m128, t.m128, mask);
}
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
return blendv_ps(f.m128, t.m128, s);
}
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fa& a )
{
const Vec3fa b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline Vec3fa floor(const Vec3fa& a) { return vrndmq_f32(a.m128); }
__forceinline Vec3fa ceil (const Vec3fa& a) { return vrndpq_f32(a.m128); }
__forceinline Vec3fa trunc(const Vec3fa& a) { return vrndq_f32(a.m128); }
#elif defined (__SSE4_1__)
__forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
#else
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(a.x),floorf(a.y),floorf(a.z)); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3fa Vec3fa_t;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fx Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fx
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
union {
__m128 m128;
struct { float x,y,z; union { int a; unsigned u; float w; }; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ) {}
__forceinline Vec3fx( const __m128 a ) : m128(a) {}
__forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {}
__forceinline operator Vec3fa () const { return Vec3fa(m128); }
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
__forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; }
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; }
__forceinline Vec3fx( const Vec3fa& other, const float w1) {
#if defined (__aarch64__)
m128 = other.m128; m128[3] = w1;
#elif defined (__SSE4_1__)
m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4);
#else
const vint4 mask(-1,-1,-1,0);
m128 = select(vboolf4(_mm_castsi128_ps(mask)),vfloat4(other.m128),vfloat4(w1));
#endif
}
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(w, z, y, x)) {}
//__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fx load( const void* const a ) {
return Vec3fx(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
}
static __forceinline Vec3fx loadu( const void* const a ) {
return Vec3fx(_mm_loadu_ps((float*)a));
}
static __forceinline void storeu ( void* ptr, const Vec3fx& v ) {
_mm_storeu_ps((float*)ptr,v.m128);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
__forceinline Vec3fx operator -( const Vec3fx& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline Vec3fx abs ( const Vec3fx& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline Vec3fx sign ( const Vec3fx& a ) {
return blendv_ps(Vec3fx(one).m128, (-Vec3fx(one)).m128, _mm_cmplt_ps (a.m128,Vec3fx(zero).m128));
}
__forceinline Vec3fx rcp ( const Vec3fx& a )
{
#if defined(__AVX512VL__)
const Vec3fx r = _mm_rcp14_ps(a.m128);
#else
const Vec3fx r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec3fx res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
#else
const Vec3fx res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
//return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
#endif
return res;
}
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a.m128,a.m128); }
__forceinline Vec3fx rsqrt( const Vec3fx& a )
{
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
}
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fx log ( const Vec3fx& a ) {
return Vec3fx(logf(a.x),logf(a.y),logf(a.z));
}
__forceinline Vec3fx exp ( const Vec3fx& a ) {
return Vec3fx(expf(a.x),expf(a.y),expf(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__SSE4_1__) || defined(__aarch64__)
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__) || defined(__aarch64__)
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__)
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
#else
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;}
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; }
#endif
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fx& v) {
const vfloat4 a(v.m128);
const vfloat4 b = shuffle<1>(a);
const vfloat4 c = shuffle<2>(a);
return _mm_cvtss_f32(a+b+c);
}
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fx& v) { return min(v.x,v.y,v.z); }
__forceinline float reduce_max(const Vec3fx& v) { return max(v.x,v.y,v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a.m128, b.m128); }
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
__forceinline bool isvalid ( const Vec3fx& v ) {
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fx& a ) {
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fx& v ) {
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
}
__forceinline bool is_finite4 ( const Vec3fx& a ) {
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
}
#else
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b )
{
vfloat4 a0 = vfloat4(a.m128);
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
vfloat4 b1 = vfloat4(b.m128);
return Vec3fx(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
}
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f.m128, t.m128, mask);
}
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
return blendv_ps(f.m128, t.m128, s);
}
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fx& a )
{
const Vec3fx b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); }
__forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); }
__forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); }
#elif defined (__SSE4_1__)
__forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
#else
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(a.x),floorf(a.y),floorf(a.z)); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3fx Vec3ff;
}
#endif

View file

@ -0,0 +1,617 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z, do_not_use; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ) {}
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline Vec3fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fa(x,y,z);
}
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fa log ( const Vec3fa& a ) {
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fa exp ( const Vec3fa& a ) {
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
}
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
*/
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fa& v ) {
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fa& a ) {
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
}
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fa& a )
{
const Vec3fa b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
: x(a.x), y(a.y) {}
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
: x((int)a.x), y((int)a.y), z((int)a.z) {}
typedef Vec3fa Vec3fa_t;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fx Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fx
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z; union { int a; unsigned u; float w; }; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ) {}
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fx load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline Vec3fx loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fx(x,y,z);
}
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fx log ( const Vec3fx& a ) {
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fx exp ( const Vec3fx& a ) {
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
}
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fx& v ) {
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fx& a ) {
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fx& v ) {
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
return valid_x & valid_y & valid_z & valid_w;
}
__forceinline bool is_finite4 ( const Vec3fx& v ) {
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
return finite_x & finite_y & finite_z & finite_w;
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
}
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fx& a )
{
const Vec3fx b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
}
typedef Vec3fx Vec3ff;
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
// : x(a.x), y(a.y) {}
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
}

View file

@ -0,0 +1,203 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ia_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ia Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ia
{
ALIGNED_STRUCT_(16);
union {
__m128i m128;
struct { int x,y,z; };
};
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ) {}
__forceinline Vec3ia( const __m128i a ) : m128(a) {}
__forceinline Vec3ia( const Vec3ia& other ) : m128(other.m128) {}
__forceinline Vec3ia& operator =(const Vec3ia& other) { m128 = other.m128; return *this; }
__forceinline explicit Vec3ia( const int a ) : m128(_mm_set1_epi32(a)) {}
__forceinline Vec3ia( const int x, const int y, const int z) : m128(_mm_set_epi32(z, z, y, x)) {}
__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
__forceinline operator const __m128i&() const { return m128; }
__forceinline operator __m128i&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ZeroTy ) : m128(_mm_setzero_si128()) {}
__forceinline Vec3ia( OneTy ) : m128(_mm_set1_epi32(1)) {}
__forceinline Vec3ia( PosInfTy ) : m128(_mm_set1_epi32(pos_inf)) {}
__forceinline Vec3ia( NegInfTy ) : m128(_mm_set1_epi32(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a ) { return a; }
__forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); }
#if (defined(__aarch64__))
__forceinline Vec3ia abs ( const Vec3ia& a ) { return vabsq_s32(a.m128); }
#elif defined(__SSSE3__)
__forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return _mm_add_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return _mm_sub_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
#endif
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return _mm_and_si128(a.m128, b.m128); }
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return _mm_or_si128(a.m128, b.m128); }
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return _mm_xor_si128(a.m128, b.m128); }
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); }
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); }
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); }
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); }
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
#endif
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
#if !defined(__ARM_NEON)
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
#if defined(__aarch64__) || defined(__SSE4_1__)
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
#else
return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f));
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline int reduce_add(const Vec3ia& v) { return vaddvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0))); }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return vminvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x7FFFFFFF))); }
__forceinline int reduce_max(const Vec3ia& v) { return vmaxvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x80000000))); }
#else
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); }
__forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) == 7; }
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) != 7; }
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); }
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); }
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); }
#else
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return select(lt_mask(a,b),a,b); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return select(gt_mask(a,b),a,b); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
}
#endif

View file

@ -0,0 +1,178 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ia Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ia
{
ALIGNED_STRUCT_(16);
struct { int x,y,z; };
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ) {}
//__forceinline Vec3ia( const __m128i a ) : m128(a) {}
__forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
__forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
//__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
__forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline explicit Vec3ia( const Vec3fa& a );
//__forceinline operator const __m128i&() const { return m128; }
//__forceinline operator __m128i&() { return m128; }
__forceinline operator vint4() const { return vint4(x,y,z,z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
__forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
__forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
__forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
__forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
__forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
__forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
/*
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
const int x = m.x ? t.x : f.x;
const int y = m.y ? t.y : f.y;
const int z = m.z ? t.z : f.z;
return Vec3ia(x,y,z);
}
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
return cout;
}
}

View file

@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
#include "vec3.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// Generic 4D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec4
{
enum { N = 4 };
union {
struct { T x, y, z, w; };
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec4( ) {}
__forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {}
__forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {}
__forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
__forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; }
__forceinline Vec4( const Vec3fx& other );
template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {}
template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
__forceinline Vec4& operator =(const Vec4& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
__forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {}
__forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {}
__forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {}
__forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {}
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
__forceinline const T& operator [](const size_t axis) const { assert(axis < 4); return (&x)[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis ) const { assert(axis < 4); return components[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return components[axis]; }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Swizzles
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); }
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); }
template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); }
template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); }
template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); }
template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); }
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); }
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); }
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); }
template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); }
template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); }
template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; }
template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; }
template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; }
template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; }
template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; }
template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); }
template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; }
template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
if (a.w != b.w) return a.w < b.w;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) {
return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); }
template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); }
template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
}
template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w));
}
template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
}
template<typename T>
__forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) {
return madd(Vec4<T>(T(1.0f)-t),v0,t*v1);
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec4<T>& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")";
}
////////////////////////////////////////////////////////////////////////////////
/// Default template instantiations
////////////////////////////////////////////////////////////////////////////////
typedef Vec4<bool > Vec4b;
typedef Vec4<unsigned char> Vec4uc;
typedef Vec4<int > Vec4i;
typedef Vec4<float > Vec4f;
}
#include "vec3ba.h"
#include "vec3ia.h"
#include "vec3fa.h"
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined __AVX__
#include "../simd/avx.h"
#endif
#if defined __AVX512F__
#include "../simd/avx512.h"
#endif
namespace embree
{
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#endif
}

Some files were not shown because too many files have changed in this diff Show more