125 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			125 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2009-2021 Intel Corporation
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "default.h"
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! An item on the stack holds the node ID and distance of that node. */
 | |
|   template<typename T>
 | |
|   struct __aligned(16) StackItemT
 | |
|   {
 | |
|     /*! assert that the xchg function works */
 | |
|     static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
 | |
| 
 | |
|     __forceinline StackItemT() {}
 | |
| 
 | |
|     __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
 | |
| 
 | |
|     /*! use SSE instructions to swap stack items */
 | |
|     __forceinline static void xchg(StackItemT& a, StackItemT& b) 
 | |
|     { 
 | |
|       const vfloat4 sse_a = vfloat4::load((float*)&a); 
 | |
|       const vfloat4 sse_b = vfloat4::load((float*)&b);
 | |
|       vfloat4::store(&a,sse_b);
 | |
|       vfloat4::store(&b,sse_a);
 | |
|     }
 | |
| 
 | |
|     /*! Sort 2 stack items. */
 | |
|     __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
 | |
|       if (s2.dist < s1.dist) xchg(s2,s1);
 | |
|     }
 | |
|     
 | |
|     /*! Sort 3 stack items. */
 | |
|     __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
 | |
|     {
 | |
|       if (s2.dist < s1.dist) xchg(s2,s1);
 | |
|       if (s3.dist < s2.dist) xchg(s3,s2);
 | |
|       if (s2.dist < s1.dist) xchg(s2,s1);
 | |
|     }
 | |
|     
 | |
|     /*! Sort 4 stack items. */
 | |
|     __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
 | |
|     {
 | |
|       if (s2.dist < s1.dist) xchg(s2,s1);
 | |
|       if (s4.dist < s3.dist) xchg(s4,s3);
 | |
|       if (s3.dist < s1.dist) xchg(s3,s1);
 | |
|       if (s4.dist < s2.dist) xchg(s4,s2);
 | |
|       if (s3.dist < s2.dist) xchg(s3,s2);
 | |
|     }
 | |
| 
 | |
|     /*! use SSE instructions to swap stack items */
 | |
|     __forceinline static void cmp_xchg(vint4& a, vint4& b) 
 | |
|     { 
 | |
| #if defined(__AVX512VL__)
 | |
|       const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
 | |
| #else
 | |
|       const vboolf4 mask0(b < a);
 | |
|       const vboolf4 mask(shuffle<2,2,2,2>(mask0));
 | |
| #endif
 | |
|       const vint4 c = select(mask,b,a);
 | |
|       const vint4 d = select(mask,a,b);
 | |
|       a = c;
 | |
|       b = d;
 | |
|     }
 | |
|     
 | |
|     /*! Sort 3 stack items. */
 | |
|     __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
 | |
|     {
 | |
|       cmp_xchg(s2,s1);
 | |
|       cmp_xchg(s3,s2);
 | |
|       cmp_xchg(s2,s1);
 | |
|     }
 | |
|     
 | |
|     /*! Sort 4 stack items. */
 | |
|     __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
 | |
|     {
 | |
|       cmp_xchg(s2,s1);
 | |
|       cmp_xchg(s4,s3);
 | |
|       cmp_xchg(s3,s1);
 | |
|       cmp_xchg(s4,s2);
 | |
|       cmp_xchg(s3,s2);
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /*! Sort N stack items. */
 | |
|     __forceinline friend void sort(StackItemT* begin, StackItemT* end)
 | |
|     {
 | |
|       for (StackItemT* i = begin+1; i != end; ++i)
 | |
|       {
 | |
|         const vfloat4 item = vfloat4::load((float*)i);
 | |
|         const unsigned dist = i->dist;
 | |
|         StackItemT* j = i;
 | |
| 
 | |
|         while ((j != begin) && ((j-1)->dist < dist))
 | |
|         {
 | |
|           vfloat4::store(j, vfloat4::load((float*)(j-1)));
 | |
|           --j;
 | |
|         }
 | |
| 
 | |
|         vfloat4::store(j, item);
 | |
|       }
 | |
|     }
 | |
|     
 | |
|   public:
 | |
|     T ptr;
 | |
|     unsigned dist;
 | |
|   };
 | |
| 
 | |
|   /*! An item on the stack holds the node ID and active ray mask. */
 | |
|   template<typename T>
 | |
|   struct __aligned(8) StackItemMaskT
 | |
|   {
 | |
|     T ptr;
 | |
|     size_t mask;
 | |
|   };
 | |
| 
 | |
|   struct __aligned(8) StackItemMaskCoherent
 | |
|   {
 | |
|     size_t mask;
 | |
|     size_t parent;
 | |
|     size_t child;
 | |
|   };
 | |
| }
 |